summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/Hexagon
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon')
-rw-r--r--contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp2152
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.cpp1127
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.h435
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp1605
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h56
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td263
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp598
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h62
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp2778
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp1175
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h64
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp247
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp1310
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp754
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp1063
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp1357
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp357
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp188
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp1479
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h109
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp259
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp1599
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp319
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp525
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp1965
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1563
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp2894
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h252
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td462
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td1019
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td448
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td155
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td238
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp3828
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h402
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td5809
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td266
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td4251
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td937
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td2241
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td526
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td1293
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td40
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td318
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td111
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td836
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td728
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp146
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h85
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp699
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h244
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp689
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOperands.td603
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp150
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp338
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp203
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h89
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td270
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td206
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td170
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td310
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td121
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp63
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp168
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp1209
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp616
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp125
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h121
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp299
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h50
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp98
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h41
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h31
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h70
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp1601
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h114
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp360
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h285
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp251
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h137
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp233
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h92
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h32
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp581
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h218
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp741
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h70
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp427
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp1087
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp150
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h45
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp49
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp648
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h289
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp237
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp244
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h75
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp470
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h184
-rw-r--r--contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp19
109 files changed, 66628 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
new file mode 100644
index 0000000..a8622a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -0,0 +1,2152 @@
+//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcasmparser"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCExpr.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableFutureRegs("mfuture-regs",
+ cl::desc("Enable future registers"));
+
+static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
+cl::desc("Warn for missing parenthesis around predicate registers"),
+cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
+cl::desc("Error for missing parenthesis around predicate registers"),
+cl::init(false));
+static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
+cl::desc("Warn for mismatching a signed and unsigned value"),
+cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
+cl::desc("Warn for register names that arent contigious"),
+cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
+cl::desc("Error for register names that aren't contigious"),
+cl::init(false));
+
+
+namespace {
+struct HexagonOperand;
+
+class HexagonAsmParser : public MCTargetAsmParser {
+
+ HexagonTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+ return static_cast<HexagonTargetStreamer &>(TS);
+ }
+
+ MCAsmParser &Parser;
+ MCAssembler *Assembler;
+ MCInstrInfo const &MCII;
+ MCInst MCB;
+ bool InBrackets;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAssembler *getAssembler() const { return Assembler; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool equalIsAsmAssignment() override { return false; }
+ bool isLabel(AsmToken &Token) override;
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ bool ParseDirectiveFalign(unsigned Size, SMLoc L);
+
+ virtual bool ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
+ bool ParseDirectiveSubsection(SMLoc L);
+ bool ParseDirectiveValue(unsigned Size, SMLoc L);
+ bool ParseDirectiveComm(bool IsLocal, SMLoc L);
+ bool RegisterMatchesArch(unsigned MatchNum) const;
+
+ bool matchBundleOptions();
+ bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc);
+ bool finishBundle(SMLoc IDLoc, MCStreamer &Out);
+ void canonicalizeImmediates(MCInst &MCI);
+ bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc,
+ OperandVector &InstOperands, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm, bool &MustExtend);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+ void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+ int processInstruction(MCInst &Inst, OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend);
+
+ // Check if we have an assembler and, if so, set the ELF e_header flags.
+ void chksetELFHeaderEFlags(unsigned flags) {
+ if (getAssembler())
+ getAssembler()->setELFHeaderEFlags(flags);
+ }
+
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "HexagonGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, _STI), Parser(_Parser),
+ MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+ MCAsmParserExtension::Initialize(_Parser);
+
+ Assembler = nullptr;
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ Assembler = &MES->getAssembler();
+ }
+ }
+
+ bool mustExtend(OperandVector &Operands);
+ bool splitIdentifier(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands);
+ bool parseInstruction(OperandVector &Operands);
+ bool implicitExpressionLocation(OperandVector &Operands);
+ bool parseExpressionOrOperand(OperandVector &Operands);
+ bool parseExpression(MCExpr const *& Expr);
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override
+ {
+ llvm_unreachable("Unimplemented");
+ }
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ AsmToken ID, OperandVector &Operands) override;
+
+ virtual bool ParseDirective(AsmToken DirectiveID) override;
+};
+
+/// HexagonOperand - Instances of this class represent a parsed Hexagon machine
+/// instruction.
+struct HexagonOperand : public MCParsedAsmOperand {
+ enum KindTy { Token, Immediate, Register } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct TokTy {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegTy {
+ unsigned RegNum;
+ };
+
+ struct ImmTy {
+ const MCExpr *Val;
+ bool MustExtend;
+ };
+
+ struct InstTy {
+ OperandVector *SubInsts;
+ };
+
+ union {
+ struct TokTy Tok;
+ struct RegTy Reg;
+ struct ImmTy Imm;
+ };
+
+ HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+public:
+ HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { llvm_unreachable("No isMem"); }
+ bool isReg() const { return Kind == Register; }
+
+ bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
+ bool isRelocatable, bool Extendable) const {
+ if (Kind == Immediate) {
+ const MCExpr *myMCExpr = getImm();
+ if (Imm.MustExtend && !Extendable)
+ return false;
+ int64_t Res;
+ if (myMCExpr->evaluateAsAbsolute(Res)) {
+ int bits = immBits + zeroBits;
+ // Field bit range is zerobits + bits
+ // zeroBits must be 0
+ if (Res & ((1 << zeroBits) - 1))
+ return false;
+ if (isSigned) {
+ if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1)))
+ return true;
+ } else {
+ if (bits == 64)
+ return true;
+ if (Res >= 0)
+ return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+ else {
+ const int64_t high_bit_set = 1ULL << 63;
+ const uint64_t mask = (high_bit_set >> (63 - bits));
+ return (((uint64_t)Res & mask) == mask) ? true : false;
+ }
+ }
+ } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
+ return true;
+ else if (myMCExpr->getKind() == MCExpr::Binary ||
+ myMCExpr->getKind() == MCExpr::Unary)
+ return true;
+ }
+ return false;
+ }
+
+ bool isf32Ext() const { return false; }
+ bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+ bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
+ bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+ bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
+ bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
+ bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
+ bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
+ bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
+ bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
+ bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+
+ bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
+ bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+ bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
+ bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
+ bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
+ bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
+ bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
+ bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+ bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+ bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
+ bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
+ bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
+ bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+ bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
+ bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
+ bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
+ bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
+ bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
+ bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+ bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+
+ bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+ bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+ bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+ bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+ bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+ bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+ bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+ bool iss11_0Ext() const {
+ return CheckImmRange(11 + 26, 0, true, true, true);
+ }
+ bool iss11_1Ext() const {
+ return CheckImmRange(11 + 26, 1, true, true, true);
+ }
+ bool iss11_2Ext() const {
+ return CheckImmRange(11 + 26, 2, true, true, true);
+ }
+ bool iss11_3Ext() const {
+ return CheckImmRange(11 + 26, 3, true, true, true);
+ }
+
+ bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+ bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+ bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+ bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+ bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
+ bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
+ bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
+ bool isu32MustExt() const { return isImm() && Imm.MustExtend; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ }
+
+ void addSignedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ MCExpr const *Expr = getImm();
+ int64_t Value;
+ if (!Expr->evaluateAsAbsolute(Value)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ int64_t Extended = SignExtend64 (Value, 32);
+ if ((Extended < 0) == (Value < 0)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ // Flip bit 33 to signal signed unsigned mismatch
+ Extended ^= 0x100000000;
+ Inst.addOperand(MCOperand::createImm(Extended));
+ }
+
+ void addf32ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
+ HexagonOperand *Op = new HexagonOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->Imm.MustExtend = false;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+};
+
+} // end anonymous namespace.
+
+void HexagonOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS, nullptr);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+static unsigned MatchRegisterName(StringRef Name);
+
+bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
+ DEBUG(dbgs() << "Bundle:");
+ DEBUG(MCB.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "--\n");
+
+ // Check the bundle for errors.
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+
+ bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
+ getContext(), MCB,
+ &Check);
+
+ while (Check.getNextErrInfo() == true) {
+ unsigned Reg = Check.getErrRegister();
+ Twine R(RI->getName(Reg));
+
+ uint64_t Err = Check.getError();
+ if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
+ Error(IDLoc,
+ "unconditional branch cannot precede another branch in packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
+ HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
+ Error(IDLoc, "register `" + R +
+ "' used with `.new' "
+ "but not validly modified in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
+ Error(IDLoc, "register `" + R + "' modified more than once");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
+ Error(IDLoc, "cannot write to read-only register `" + R + "'");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
+ Error(IDLoc, "loop-setup and some branch instructions "
+ "cannot be in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
+ Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+ Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+ "cannot contain instructions that modify register " +
+ "`" + R + "'");
+ }
+
+ if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
+ Error(IDLoc,
+ "instruction cannot appear in packet with other instructions");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
+ Error(IDLoc, "too many slots used in packet");
+
+ if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
+ uint64_t Erm = Check.getShuffleError();
+
+ if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
+ Error(IDLoc, "invalid instruction packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
+ Error(IDLoc, "invalid instruction packet: too many stores");
+ else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
+ Error(IDLoc, "invalid instruction packet: too many loads");
+ else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
+ Error(IDLoc, "too many branches in packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: slot error");
+ else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
+ Error(IDLoc, "v60 packet violation");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
+ Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+ else
+ Error(IDLoc, "unknown error in instruction packet");
+ }
+ }
+
+ unsigned Warn = Check.getWarning();
+ if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.cur' "
+ "but not used in the same packet");
+ else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.tmp' "
+ "but not used in the same packet");
+ }
+ }
+
+ if (CheckOk) {
+ MCB.setLoc(IDLoc);
+ if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
+ assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
+ assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
+ // Empty packets are valid yet aren't emitted
+ return false;
+ }
+ Out.EmitInstruction(MCB, getSTI());
+ } else {
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ return true; // Error
+ }
+ }
+
+ return false; // No error
+}
+
+bool HexagonAsmParser::matchBundleOptions() {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ if (!Parser.getTok().is(AsmToken::Colon))
+ return false;
+ Lexer.Lex();
+ StringRef Option = Parser.getTok().getString();
+ if (Option.compare_lower("endloop0") == 0)
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ else if (Option.compare_lower("endloop1") == 0)
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ else if (Option.compare_lower("mem_noshuf") == 0)
+ HexagonMCInstrInfo::setMemReorderDisabled(MCB);
+ else if (Option.compare_lower("mem_shuf") == 0)
+ HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
+ else
+ return true;
+ Lexer.Lex();
+ }
+}
+
+// For instruction aliases, immediates are generated rather than
+// MCConstantExpr. Convert them for uniform MCExpr.
+// Also check for signed/unsigned mismatches and warn
+void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
+ MCInst NewInst;
+ NewInst.setOpcode(MCI.getOpcode());
+ for (MCOperand &I : MCI)
+ if (I.isImm()) {
+ int64_t Value (I.getImm());
+ if ((Value & 0x100000000) != (Value & 0x80000000)) {
+ // Detect flipped bit 33 wrt bit 32 and signal warning
+ Value ^= 0x100000000;
+ if (WarnSignedMismatch)
+ Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
+ }
+ NewInst.addOperand(MCOperand::createExpr(
+ MCConstantExpr::create(Value, getContext())));
+ }
+ else
+ NewInst.addOperand(I);
+ MCI = NewInst;
+}
+
+bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
+ OperandVector &InstOperands,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm,
+ bool &MustExtend) {
+ // Perform matching with tablegen asmmatcher generated function
+ int result =
+ MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm);
+ if (result == Match_Success) {
+ MCI.setLoc(IDLoc);
+ MustExtend = mustExtend(InstOperands);
+ canonicalizeImmediates(MCI);
+ result = processInstruction(MCI, InstOperands, IDLoc, MustExtend);
+
+ DEBUG(dbgs() << "Insn:");
+ DEBUG(MCI.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "\n\n");
+
+ MCI.setLoc(IDLoc);
+ }
+
+ // Create instruction operand for bundle instruction
+ // Break this into a separate function Code here is less readable
+ // Think about how to get an instruction error to report correctly.
+ // SMLoc will return the "{"
+ switch (result) {
+ default:
+ break;
+ case Match_Success:
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "invalid instruction");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction");
+ case Match_InvalidOperand:
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= InstOperands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get()))
+ ->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool HexagonAsmParser::mustExtend(OperandVector &Operands) {
+ unsigned Count = 0;
+ for (std::unique_ptr<MCParsedAsmOperand> &i : Operands)
+ if (i->isImm())
+ if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend)
+ ++Count;
+ // Multiple extenders should have been filtered by iss9Ext et. al.
+ assert(Count < 2 && "Multiple extenders");
+ return Count == 1;
+}
+
+bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ if (!InBrackets) {
+ MCB.clear();
+ MCB.addOperand(MCOperand::createImm(0));
+ }
+ HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]);
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "{") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (InBrackets) {
+ getParser().Error(IDLoc, "Already in a packet");
+ return true;
+ }
+ InBrackets = true;
+ return false;
+ }
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "}") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (!InBrackets) {
+ getParser().Error(IDLoc, "Not in a packet");
+ return true;
+ }
+ InBrackets = false;
+ if (matchBundleOptions())
+ return true;
+ return finishBundle(IDLoc, Out);
+ }
+ MCInst *SubInst = new (getParser().getContext()) MCInst;
+ bool MustExtend = false;
+ if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
+ MatchingInlineAsm, MustExtend))
+ return true;
+ HexagonMCInstrInfo::extendIfNeeded(
+ getParser().getContext(), MCII, MCB, *SubInst,
+ HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend);
+ MCB.addOperand(MCOperand::createInst(SubInst));
+ if (!InBrackets)
+ return finishBundle(IDLoc, Out);
+ return false;
+}
+
+/// ParseDirective parses the Hexagon specific directives
+bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
+ return ParseDirectiveValue(4, DirectiveID.getLoc());
+ if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
+ IDVal.lower() == ".half")
+ return ParseDirectiveValue(2, DirectiveID.getLoc());
+ if (IDVal.lower() == ".falign")
+ return ParseDirectiveFalign(256, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
+ return ParseDirectiveComm(true, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common"))
+ return ParseDirectiveComm(false, DirectiveID.getLoc());
+ if (IDVal.lower() == ".subsection")
+ return ParseDirectiveSubsection(DirectiveID.getLoc());
+
+ return true;
+}
+bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
+ const MCExpr *Subsection = 0;
+ int64_t Res;
+
+ assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
+ "Invalid subsection directive");
+ getParser().parseExpression(Subsection);
+
+ if (!Subsection->evaluateAsAbsolute(Res))
+ return Error(L, "Cannot evaluate subsection number");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the
+ // negative subsections together and in the same order but at the opposite
+ // end of the section. Only legacy hexagon-gcc created assembly code
+ // used negative subsections.
+ if ((Res < 0) && (Res > -8193))
+ Subsection = MCConstantExpr::create(8192 + Res, this->getContext());
+
+ getStreamer().SubSection(Subsection);
+ return false;
+}
+
+/// ::= .falign [expression]
+bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
+
+ int64_t MaxBytesToFill = 15;
+
+ // if there is an arguement
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+
+ // Make sure we have a number (false is returned if expression is a number)
+ if (getParser().parseExpression(Value) == false) {
+ // Make sure this is a number that is in range
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
+ return Error(ExprLoc, "literal value out of range (256) for falign");
+ MaxBytesToFill = IntValue;
+ Lex();
+ } else {
+ return Error(ExprLoc, "not a valid expression for falign directive");
+ }
+ }
+
+ getTargetStreamer().emitFAlign(16, MaxBytesToFill);
+ Lex();
+
+ return false;
+}
+
+/// ::= .word [ expression (, expression)* ]
+bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else
+ getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+// This is largely a copy of AsmParser's ParseDirectiveComm extended to
+// accept a 3rd argument, AccessAlignment which indicates the smallest
+// memory access made to the symbol, expressed in bytes. If no
+// AccessAlignment is specified it defaults to the Alignment Value.
+// Hexagon's .lcomm:
+// .lcomm Symbol, Length, Alignment, AccessAlignment
+bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
+ // FIXME: need better way to detect if AsmStreamer (upstream removed
+ // getKind())
+ if (getStreamer().hasRawTextSupport())
+ return true; // Only object file output requires special treatment.
+
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t ByteAlignment = 1;
+ SMLoc ByteAlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ ByteAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(ByteAlignment))
+ return true;
+ if (!isPowerOf2_64(ByteAlignment))
+ return Error(ByteAlignmentLoc, "alignment must be a power of 2");
+ }
+
+ int64_t AccessAlignment = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ // The optional access argument specifies the size of the smallest memory
+ // access to be made to the symbol, expressed in bytes.
+ SMLoc AccessAlignmentLoc;
+ Lex();
+ AccessAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(AccessAlignment))
+ return true;
+
+ if (!isPowerOf2_64(AccessAlignment))
+ return Error(AccessAlignmentLoc, "access alignment must be a power of 2");
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+ Lex();
+
+ // NOTE: a size of zero for a .comm should create a undefined symbol
+ // but a size of .lcomm creates a bss symbol of size zero.
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+ "be less than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (ByteAlignment < 0)
+ return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+ "alignment, can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(Loc, "invalid symbol redefinition");
+
+ HexagonMCELFStreamer &HexagonELFStreamer =
+ static_cast<HexagonMCELFStreamer &>(getStreamer());
+ if (IsLocal) {
+ HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+ }
+
+ HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+}
+
+// validate register against architecture
+bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+ return true;
+}
+
+// extern "C" void LLVMInitializeHexagonAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeHexagonAsmParser() {
+ RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+}
+
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_REGISTER_MATCHER
+#include "HexagonGenAsmMatcher.inc"
+
+namespace {
+bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+ if (Index >= Operands.size())
+ return false;
+ MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
+ if (!Operand.isToken())
+ return false;
+ return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
+}
+bool previousIsLoop(OperandVector &Operands, size_t Index) {
+ return previousEqual(Operands, Index, "loop0") ||
+ previousEqual(Operands, Index, "loop1") ||
+ previousEqual(Operands, Index, "sp1loop0") ||
+ previousEqual(Operands, Index, "sp2loop0") ||
+ previousEqual(Operands, Index, "sp3loop0");
+}
+}
+
+bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
+ AsmToken const &Token = getParser().getTok();
+ StringRef String = Token.getString();
+ SMLoc Loc = Token.getLoc();
+ getLexer().Lex();
+ do {
+ std::pair<StringRef, StringRef> HeadTail = String.split('.');
+ if (!HeadTail.first.empty())
+ Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc));
+ if (!HeadTail.second.empty())
+ Operands.push_back(HexagonOperand::CreateToken(
+ String.substr(HeadTail.first.size(), 1), Loc));
+ String = HeadTail.second;
+ } while (!String.empty());
+ return false;
+}
+
+bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
+ unsigned Register;
+ SMLoc Begin;
+ SMLoc End;
+ MCAsmLexer &Lexer = getLexer();
+ if (!ParseRegister(Register, Begin, End)) {
+ if (!ErrorMissingParenthesis)
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::P0:
+ case Hexagon::P1:
+ case Hexagon::P2:
+ case Hexagon::P3:
+ if (previousEqual(Operands, 0, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.push_back(HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ if (previousEqual(Operands, 0, "!") &&
+ previousEqual(Operands, 1, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.insert(Operands.end () - 1,
+ HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ break;
+ }
+ Operands.push_back(HexagonOperand::CreateReg(
+ Register, Begin, End));
+ return false;
+ }
+ return splitIdentifier(Operands);
+}
+
+bool HexagonAsmParser::isLabel(AsmToken &Token) {
+ MCAsmLexer &Lexer = getLexer();
+ AsmToken const &Second = Lexer.getTok();
+ AsmToken Third = Lexer.peekTok();
+ StringRef String = Token.getString();
+ if (Token.is(AsmToken::TokenKind::LCurly) ||
+ Token.is(AsmToken::TokenKind::RCurly))
+ return false;
+ if (!Token.is(AsmToken::TokenKind::Identifier))
+ return true;
+ if (!MatchRegisterName(String.lower()))
+ return true;
+ (void)Second;
+ assert(Second.is(AsmToken::Colon));
+ StringRef Raw (String.data(), Third.getString().data() - String.data() +
+ Third.getString().size());
+ std::string Collapsed = Raw;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef Whole = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
+ if (!MatchRegisterName(DotSplit.first.lower()))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) {
+ if (!Contigious && ErrorNoncontigiousRegister) {
+ Error(Loc, "Register name is not contigious");
+ return true;
+ }
+ if (!Contigious && WarnNoncontigiousRegister)
+ Warning(Loc, "Register name is not contigious");
+ return false;
+}
+
+bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmLexer &Lexer = getLexer();
+ StartLoc = getLexer().getLoc();
+ SmallVector<AsmToken, 5> Lookahead;
+ StringRef RawString(Lexer.getTok().getString().data(), 0);
+ bool Again = Lexer.is(AsmToken::Identifier);
+ bool NeededWorkaround = false;
+ while (Again) {
+ AsmToken const &Token = Lexer.getTok();
+ RawString = StringRef(RawString.data(),
+ Token.getString().data() - RawString.data () +
+ Token.getString().size());
+ Lookahead.push_back(Token);
+ Lexer.Lex();
+ bool Contigious = Lexer.getTok().getString().data() ==
+ Lookahead.back().getString().data() +
+ Lookahead.back().getString().size();
+ bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) ||
+ Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) ||
+ Lexer.is(AsmToken::Colon);
+ bool Workaround = Lexer.is(AsmToken::Colon) ||
+ Lookahead.back().is(AsmToken::Colon);
+ Again = (Contigious && Type) || (Workaround && Type);
+ NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
+ }
+ std::string Collapsed = RawString;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef FullString = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
+ unsigned DotReg = MatchRegisterName(DotSplit.first.lower());
+ if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ if (DotSplit.second.empty()) {
+ RegNo = DotReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ } else {
+ RegNo = DotReg;
+ size_t First = RawString.find('.');
+ StringRef DotString (RawString.data() + First, RawString.size() - First);
+ Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ }
+ std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
+ unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower());
+ if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ RegNo = ColonReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ while (!Lookahead.empty()) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ }
+ return true;
+}
+
+bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
+ if (previousEqual(Operands, 0, "call"))
+ return true;
+ if (previousEqual(Operands, 0, "jump"))
+ if (!getLexer().getTok().is(AsmToken::Colon))
+ return true;
+ if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1))
+ return true;
+ if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") &&
+ (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t")))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
+ llvm::SmallVector<AsmToken, 4> Tokens;
+ MCAsmLexer &Lexer = getLexer();
+ bool Done = false;
+ static char const * Comma = ",";
+ do {
+ Tokens.emplace_back (Lexer.getTok());
+ Lexer.Lex();
+ switch (Tokens.back().getKind())
+ {
+ case AsmToken::TokenKind::Hash:
+ if (Tokens.size () > 1)
+ if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) {
+ Tokens.insert(Tokens.end() - 2,
+ AsmToken(AsmToken::TokenKind::Comma, Comma));
+ Done = true;
+ }
+ break;
+ case AsmToken::TokenKind::RCurly:
+ case AsmToken::TokenKind::EndOfStatement:
+ case AsmToken::TokenKind::Eof:
+ Done = true;
+ break;
+ default:
+ break;
+ }
+ } while (!Done);
+ while (!Tokens.empty()) {
+ Lexer.UnLex(Tokens.back());
+ Tokens.pop_back();
+ }
+ return getParser().parseExpression(Expr);
+}
+
+bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) {
+ if (implicitExpressionLocation(Operands)) {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = Parser.getLexer().getLoc();
+ std::unique_ptr<HexagonOperand> Expr =
+ HexagonOperand::CreateImm(nullptr, Loc, Loc);
+ MCExpr const *& Val = Expr->Imm.Val;
+ Operands.push_back(std::move(Expr));
+ return parseExpression(Val);
+ }
+ return parseOperand(Operands);
+}
+
+/// Parse an instruction.
+bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ AsmToken const &Token = Parser.getTok();
+ switch (Token.getKind()) {
+ case AsmToken::EndOfStatement: {
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::LCurly: {
+ if (!Operands.empty())
+ return true;
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::RCurly: {
+ if (Operands.empty()) {
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ }
+ return false;
+ }
+ case AsmToken::Comma: {
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::EqualEqual:
+ case AsmToken::ExclaimEqual:
+ case AsmToken::GreaterEqual:
+ case AsmToken::GreaterGreater:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess: {
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(0, 1), Token.getLoc()));
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(1, 1), Token.getLoc()));
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::Hash: {
+ bool MustNotExtend = false;
+ bool ImplicitExpression = implicitExpressionLocation(Operands);
+ std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm(
+ nullptr, Lexer.getLoc(), Lexer.getLoc());
+ if (!ImplicitExpression)
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ bool MustExtend = false;
+ bool HiOnly = false;
+ bool LoOnly = false;
+ if (Lexer.is(AsmToken::Hash)) {
+ Lexer.Lex();
+ MustExtend = true;
+ } else if (ImplicitExpression)
+ MustNotExtend = true;
+ AsmToken const &Token = Parser.getTok();
+ if (Token.is(AsmToken::Identifier)) {
+ StringRef String = Token.getString();
+ AsmToken IDToken = Token;
+ if (String.lower() == "hi") {
+ HiOnly = true;
+ } else if (String.lower() == "lo") {
+ LoOnly = true;
+ }
+ if (HiOnly || LoOnly) {
+ AsmToken LParen = Lexer.peekTok();
+ if (!LParen.is(AsmToken::LParen)) {
+ HiOnly = false;
+ LoOnly = false;
+ } else {
+ Lexer.Lex();
+ }
+ }
+ }
+ if (parseExpression(Expr->Imm.Val))
+ return true;
+ int64_t Value;
+ MCContext &Context = Parser.getContext();
+ assert(Expr->Imm.Val != nullptr);
+ if (Expr->Imm.Val->evaluateAsAbsolute(Value)) {
+ if (HiOnly)
+ Expr->Imm.Val = MCBinaryExpr::createLShr(
+ Expr->Imm.Val, MCConstantExpr::create(16, Context), Context);
+ if (HiOnly || LoOnly)
+ Expr->Imm.Val = MCBinaryExpr::createAnd(
+ Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context);
+ }
+ if (MustNotExtend)
+ Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context);
+ Expr->Imm.MustExtend = MustExtend;
+ Operands.push_back(std::move(Expr));
+ continue;
+ }
+ default:
+ break;
+ }
+ if (parseExpressionOrOperand(Operands))
+ return true;
+ }
+}
+
+bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name,
+ AsmToken ID,
+ OperandVector &Operands) {
+ getLexer().UnLex(ID);
+ return parseInstruction(Operands);
+}
+
+namespace {
+MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+ MCOperand &MO1, MCOperand &MO2) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(opCode);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(MO1);
+ TmpInst.addOperand(MO2);
+
+ return TmpInst;
+}
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp);
+
+ switch (Kind) {
+ case MCK_0: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK__MINUS_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ }
+ if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
+ StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
+ if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind)
+ return Match_Success;
+ if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind)
+ return Match_Success;
+ }
+
+ DEBUG(dbgs() << "Unmatched Operand:");
+ DEBUG(Op->dump());
+ DEBUG(dbgs() << "\n");
+
+ return Match_InvalidOperand;
+}
+
+void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+ std::string errStr;
+ raw_string_ostream ES(errStr);
+ ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
+ if (Max >= 0)
+ ES << "0-" << Max;
+ else
+ ES << Max << "-" << (-Max - 1);
+ Error(IDLoc, ES.str().c_str());
+}
+
+int HexagonAsmParser::processInstruction(MCInst &Inst,
+ OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend) {
+ MCContext &Context = getParser().getContext();
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ std::string r = "r";
+ std::string v = "v";
+ std::string Colon = ":";
+
+ bool is32bit = false; // used to distinguish between CONST32 and CONST64
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+
+ case Hexagon::M4_mpyrr_addr:
+ case Hexagon::S4_addi_asl_ri:
+ case Hexagon::S4_addi_lsr_ri:
+ case Hexagon::S4_andi_asl_ri:
+ case Hexagon::S4_andi_lsr_ri:
+ case Hexagon::S4_ori_asl_ri:
+ case Hexagon::S4_ori_lsr_ri:
+ case Hexagon::S4_or_andix:
+ case Hexagon::S4_subi_asl_ri:
+ case Hexagon::S4_subi_lsr_ri: {
+ MCOperand &Ry = Inst.getOperand(0);
+ MCOperand &src = Inst.getOperand(2);
+ if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg()))
+ return Match_InvalidOperand;
+ break;
+ }
+
+ case Hexagon::C2_cmpgei: {
+ MCOperand &MO = Inst.getOperand(2);
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgti);
+ break;
+ }
+
+ case Hexagon::C2_cmpgeui: {
+ MCOperand &MO = Inst.getOperand(2);
+ int64_t Value;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success && "Assured by matcher");
+ if (Value == 0) {
+ MCInst TmpInst;
+ MCOperand &Pd = Inst.getOperand(0);
+ MCOperand &Rt = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::C2_cmpeq);
+ TmpInst.addOperand(Pd);
+ TmpInst.addOperand(Rt);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ } else {
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgtui);
+ }
+ break;
+ }
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0i: {
+ MCOperand &MO = Inst.getOperand(0);
+ // Loop has different opcodes for extended vs not extended, but we should
+ // not use the other opcode as it is a legacy artifact of TD files.
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ // if the operand can fit within a 7:2 field
+ if (Value < (1 << 8) && Value >= -(1 << 8)) {
+ SMLoc myLoc = Operands[2]->getStartLoc();
+ // # is left in startLoc in the case of ##
+ // If '##' found then force extension.
+ if (*myLoc.getPointer() == '#') {
+ MustExtend = true;
+ break;
+ }
+ } else {
+ // If immediate and out of 7:2 range.
+ MustExtend = true;
+ }
+ }
+ break;
+ }
+
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode(Hexagon::A2_combinew);
+ break;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ break;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ break;
+ }
+
+ // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ is32bit = true;
+ // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ MCOperand &MO_1 = Inst.getOperand(1);
+ MCOperand &MO_0 = Inst.getOperand(0);
+
+ // push section onto section stack
+ MES->PushSection();
+
+ std::string myCharStr;
+ MCSectionELF *mySection;
+
+ // check if this as an immediate or a symbol
+ int64_t Value;
+ bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute) {
+ // Create a new section - one for each constant
+ // Some or all of the zeros are replaced with the given immediate.
+ if (is32bit) {
+ std::string myImmStr = utohexstr(static_cast<uint32_t>(Value));
+ myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ } else {
+ std::string myImmStr = utohexstr(Value);
+ myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ }
+
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else if (MO_1.isExpr()) {
+ // .lita - for expressions
+ myCharStr = ".lita";
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->SwitchSection(mySection);
+ unsigned byteSize = is32bit ? 4 : 8;
+ getStreamer().EmitCodeAlignment(byteSize, byteSize);
+
+ MCSymbol *Sym;
+
+ // for symbols, get rid of prepended ".gnu.linkonce.lx."
+
+ // emit symbol if needed
+ if (Absolute) {
+ Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
+ if (Sym->isUndefined()) {
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ getStreamer().EmitIntValue(Value, byteSize);
+ }
+ } else if (MO_1.isExpr()) {
+ const char *StringStart = 0;
+ const char *StringEnd = 0;
+ if (*Operands[4]->getStartLoc().getPointer() == '#') {
+ StringStart = Operands[5]->getStartLoc().getPointer();
+ StringEnd = Operands[6]->getStartLoc().getPointer();
+ } else { // no pound
+ StringStart = Operands[4]->getStartLoc().getPointer();
+ StringEnd = Operands[5]->getStartLoc().getPointer();
+ }
+
+ unsigned size = StringEnd - StringStart;
+ std::string DotConst = ".CONST_";
+ Sym = getContext().getOrCreateSymbol(DotConst +
+ StringRef(StringStart, size));
+
+ if (Sym->isUndefined()) {
+ // case where symbol is not yet defined: emit symbol
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
+ getStreamer().EmitValue(MO_1.getExpr(), 4);
+ }
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->PopSection();
+
+ if (Sym) {
+ MCInst TmpInst;
+ if (is32bit) // 32 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ else // 64 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+
+ TmpInst.addOperand(MO_0);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+ Inst = TmpInst;
+ }
+ }
+ break;
+
+ // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)"
+ case Hexagon::A2_tfrpi: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0;
+ MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context)));
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO);
+ break;
+ }
+
+ // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)"
+ case Hexagon::TFRI64_V4: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ unsigned long long u64 = Value;
+ signed int s8 = (u64 >> 32) & 0xFFFFFFFF;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(s8, Context))); // upper 32
+ MCOperand imm2(MCOperand::createExpr(
+ MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2);
+ } else {
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(0, Context))); // upper 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO);
+ }
+ break;
+ }
+
+ // Handle $Rdd = combine(##imm, #imm)"
+ case Hexagon::TFRI64_V2_ext: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ MCOperand &MO2 = Inst.getOperand(2);
+ int64_t Value;
+ if (MO2.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ // Handle $Rdd = combine(#imm, ##imm)"
+ case Hexagon::A4_combineii: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ int64_t Value;
+ if (MO1.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ MCOperand &MO2 = Inst.getOperand(2);
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ case Hexagon::S2_tableidxb_goodsyntax: {
+ Inst.setOpcode(Hexagon::S2_tableidxb);
+ break;
+ }
+
+ case Hexagon::S2_tableidxh_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxh);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxw_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(2, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxw);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxd_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(3, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxd);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_mpyui: {
+ Inst.setOpcode(Hexagon::M2_mpyi);
+ break;
+ }
+ case Hexagon::M2_mpysmi: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (!MustExtend) {
+ if (Value < 0 && Value > -256) {
+ Imm.setExpr(MCConstantExpr::create(Value * -1, Context));
+ TmpInst.setOpcode(Hexagon::M2_mpysin);
+ } else if (Value < 256 && Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ } else {
+ if (Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ }
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ MCInst TmpInst;
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rd = $Rs
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ }
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ }
+ break;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ Inst.setOpcode(Hexagon::A4_boundscheck_hi);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // raw:lo
+ Inst.setOpcode(Hexagon::A4_boundscheck_lo);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi
+ Inst.setOpcode(Hexagon::A2_addsph);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped raw:lo
+ Inst.setOpcode(Hexagon::A2_addspl);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCInst TmpInst;
+ MCOperand &Rxx = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(2);
+ MCOperand &Rt = Inst.getOperand(3);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ // Registers are in different positions
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped rnd:sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0)
+ Inst.setOpcode(Hexagon::S2_vsathub);
+ else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ }
+ break;
+ }
+
+ case Hexagon::S5_vasrhrnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) {
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_vasrhrnd);
+ }
+ break;
+ }
+
+ case Hexagon::A2_not: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::A2_subri);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ TmpInst.addOperand(Rs);
+ Inst = TmpInst;
+ break;
+ }
+ } // switch
+
+ return Match_Success;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
new file mode 100644
index 0000000..ea96eb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -0,0 +1,1127 @@
+//===--- BitTracker.cpp ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// SSA-based bit propagation.
+//
+// The purpose of this code is, for a given virtual register, to provide
+// information about the value of each bit in the register. The values
+// of bits are represented by the class BitValue, and take one of four
+// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the
+// "ref" value means that the bit is a copy of another bit (which itself
+// cannot be a copy of yet another bit---such chains are not allowed).
+// A "ref" value is associated with a BitRef structure, which indicates
+// which virtual register, and which bit in that register is the origin
+// of the value. For example, given an instruction
+// vreg2 = ASL vreg1, 1
+// assuming that nothing is known about bits of vreg1, bit 1 of vreg2
+// will be a "ref" to (vreg1, 0). If there is a subsequent instruction
+// vreg3 = ASL vreg2, 2
+// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well.
+// The "bottom" case means that the bit's value cannot be determined,
+// and that this virtual register actually defines it. The "bottom" case
+// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref
+// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to
+// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc.
+//
+// The tracker implements the Wegman-Zadeck algorithm, originally developed
+// for SSA-based constant propagation. Each register is represented as
+// a sequence of bits, with the convention that bit 0 is the least signi-
+// ficant bit. Each bit is propagated individually. The class RegisterCell
+// implements the register's representation, and is also the subject of
+// the lattice operations in the tracker.
+//
+// The intended usage of the bit tracker is to create a target-specific
+// machine instruction evaluator, pass the evaluator to the BitTracker
+// object, and run the tracker. The tracker will then collect the bit
+// value information for a given machine function. After that, it can be
+// queried for the cells for each virtual register.
+// Sample code:
+// const TargetSpecificEvaluator TSE(TRI, MRI);
+// BitTracker BT(TSE, MF);
+// BT.run();
+// ...
+// unsigned Reg = interestingRegister();
+// RegisterCell RC = BT.get(Reg);
+// if (RC[3].is(1))
+// Reg0bit3 = 1;
+//
+// The code below is intended to be fully target-independent.
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "BitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+namespace {
+ // Local trickery to pretty print a register (without the whole "%vreg"
+ // business).
+ struct printv {
+ printv(unsigned r) : R(r) {}
+ unsigned R;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
+ if (PV.R)
+ OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
+ else
+ OS << 's';
+ return OS;
+ }
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) {
+ switch (BV.Type) {
+ case BT::BitValue::Top:
+ OS << 'T';
+ break;
+ case BT::BitValue::Zero:
+ OS << '0';
+ break;
+ case BT::BitValue::One:
+ OS << '1';
+ break;
+ case BT::BitValue::Ref:
+ OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']';
+ break;
+ }
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) {
+ unsigned n = RC.Bits.size();
+ OS << "{ w:" << n;
+ // Instead of printing each bit value individually, try to group them
+ // into logical segments, such as sequences of 0 or 1 bits or references
+ // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz").
+ // "Start" will be the index of the beginning of the most recent segment.
+ unsigned Start = 0;
+ bool SeqRef = false; // A sequence of refs to consecutive bits.
+ bool ConstRef = false; // A sequence of refs to the same bit.
+
+ for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) {
+ const BT::BitValue &V = RC[i];
+ const BT::BitValue &SV = RC[Start];
+ bool IsRef = (V.Type == BT::BitValue::Ref);
+ // If the current value is the same as Start, skip to the next one.
+ if (!IsRef && V == SV)
+ continue;
+ if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) {
+ if (Start+1 == i) {
+ SeqRef = (V.RefI.Pos == SV.RefI.Pos+1);
+ ConstRef = (V.RefI.Pos == SV.RefI.Pos);
+ }
+ if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start))
+ continue;
+ if (ConstRef && V.RefI.Pos == SV.RefI.Pos)
+ continue;
+ }
+
+ // The current value is different. Print the previous one and reset
+ // the Start.
+ OS << " [" << Start;
+ unsigned Count = i - Start;
+ if (Count == 1) {
+ OS << "]:" << SV;
+ } else {
+ OS << '-' << i-1 << "]:";
+ if (SV.Type == BT::BitValue::Ref && SeqRef)
+ OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+ << SV.RefI.Pos+(Count-1) << ']';
+ else
+ OS << SV;
+ }
+ Start = i;
+ SeqRef = ConstRef = false;
+ }
+
+ OS << " [" << Start;
+ unsigned Count = n - Start;
+ if (n-Start == 1) {
+ OS << "]:" << RC[Start];
+ } else {
+ OS << '-' << n-1 << "]:";
+ const BT::BitValue &SV = RC[Start];
+ if (SV.Type == BT::BitValue::Ref && SeqRef)
+ OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+ << SV.RefI.Pos+(Count-1) << ']';
+ else
+ OS << SV;
+ }
+ OS << " }";
+
+ return OS;
+}
+
+BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
+ : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
+
+BitTracker::~BitTracker() {
+ delete &Map;
+}
+
+
+// If we were allowed to update a cell for a part of a register, the meet
+// operation would need to be parametrized by the register number and the
+// exact part of the register, so that the computer BitRefs correspond to
+// the actual bits of the "self" register.
+// While this cannot happen in the current implementation, I'm not sure
+// if this should be ruled out in the future.
+bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
+ // An example when "meet" can be invoked with SelfR == 0 is a phi node
+ // with a physical register as an operand.
+ assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR));
+ bool Changed = false;
+ for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
+ const BitValue &RCV = RC[i];
+ Changed |= Bits[i].meet(RCV, BitRef(SelfR, i));
+ }
+ return Changed;
+}
+
+
+// Insert the entire cell RC into the current cell at position given by M.
+BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
+ const BitMask &M) {
+ uint16_t B = M.first(), E = M.last(), W = width();
+ // Sanity: M must be a valid mask for *this.
+ assert(B < W && E < W);
+ // Sanity: the masked part of *this must have the same number of bits
+ // as the source.
+ assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|.
+ assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|.
+ if (B <= E) {
+ for (uint16_t i = 0; i <= E-B; ++i)
+ Bits[i+B] = RC[i];
+ } else {
+ for (uint16_t i = 0; i < W-B; ++i)
+ Bits[i+B] = RC[i];
+ for (uint16_t i = 0; i <= E; ++i)
+ Bits[i] = RC[i+(W-B)];
+ }
+ return *this;
+}
+
+
+BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
+ uint16_t B = M.first(), E = M.last(), W = width();
+ assert(B < W && E < W);
+ if (B <= E) {
+ RegisterCell RC(E-B+1);
+ for (uint16_t i = B; i <= E; ++i)
+ RC.Bits[i-B] = Bits[i];
+ return RC;
+ }
+
+ RegisterCell RC(E+(W-B)+1);
+ for (uint16_t i = 0; i < W-B; ++i)
+ RC.Bits[i] = Bits[i+B];
+ for (uint16_t i = 0; i <= E; ++i)
+ RC.Bits[i+(W-B)] = Bits[i];
+ return RC;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
+ // Rotate left (i.e. towards increasing bit indices).
+ // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1]
+ uint16_t W = width();
+ Sh = Sh % W;
+ if (Sh == 0)
+ return *this;
+
+ RegisterCell Tmp(W-Sh);
+ // Tmp = [0..W-Sh-1].
+ for (uint16_t i = 0; i < W-Sh; ++i)
+ Tmp[i] = Bits[i];
+ // Shift [W-Sh..W-1] to [0..Sh-1].
+ for (uint16_t i = 0; i < Sh; ++i)
+ Bits[i] = Bits[W-Sh+i];
+ // Copy Tmp to [Sh..W-1].
+ for (uint16_t i = 0; i < W-Sh; ++i)
+ Bits[i+Sh] = Tmp.Bits[i];
+ return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
+ const BitValue &V) {
+ assert(B <= E);
+ while (B < E)
+ Bits[B++] = V;
+ return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
+ // Append the cell given as the argument to the "this" cell.
+ // Bit 0 of RC becomes bit W of the result, where W is this->width().
+ uint16_t W = width(), WRC = RC.width();
+ Bits.resize(W+WRC);
+ for (uint16_t i = 0; i < WRC; ++i)
+ Bits[i+W] = RC.Bits[i];
+ return *this;
+}
+
+
+uint16_t BT::RegisterCell::ct(bool B) const {
+ uint16_t W = width();
+ uint16_t C = 0;
+ BitValue V = B;
+ while (C < W && Bits[C] == V)
+ C++;
+ return C;
+}
+
+
+uint16_t BT::RegisterCell::cl(bool B) const {
+ uint16_t W = width();
+ uint16_t C = 0;
+ BitValue V = B;
+ while (C < W && Bits[W-(C+1)] == V)
+ C++;
+ return C;
+}
+
+
+bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
+ uint16_t W = Bits.size();
+ if (RC.Bits.size() != W)
+ return false;
+ for (uint16_t i = 0; i < W; ++i)
+ if (Bits[i] != RC[i])
+ return false;
+ return true;
+}
+
+
+uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
+ // The general problem is with finding a register class that corresponds
+ // to a given reference reg:sub. There can be several such classes, and
+ // since we only care about the register size, it does not matter which
+ // such class we would find.
+ // The easiest way to accomplish what we want is to
+ // 1. find a physical register PhysR from the same class as RR.Reg,
+ // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
+ // 3. find a register class that contains PhysS.
+ unsigned PhysR;
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+ const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg);
+ assert(VC->begin() != VC->end() && "Empty register class");
+ PhysR = *VC->begin();
+ } else {
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+ PhysR = RR.Reg;
+ }
+
+ unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS);
+ uint16_t BW = RC->getSize()*8;
+ return BW;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
+ const CellMapType &M) const {
+ uint16_t BW = getRegBitWidth(RR);
+
+ // Physical registers are assumed to be present in the map with an unknown
+ // value. Don't actually insert anything in the map, just return the cell.
+ if (TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+ return RegisterCell::self(0, BW);
+
+ assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+ // For virtual registers that belong to a class that is not tracked,
+ // generate an "unknown" value as well.
+ const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
+ if (!track(C))
+ return RegisterCell::self(0, BW);
+
+ CellMapType::const_iterator F = M.find(RR.Reg);
+ if (F != M.end()) {
+ if (!RR.Sub)
+ return F->second;
+ BitMask M = mask(RR.Reg, RR.Sub);
+ return F->second.extract(M);
+ }
+ // If not found, create a "top" entry, but do not insert it in the map.
+ return RegisterCell::top(BW);
+}
+
+
+void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
+ CellMapType &M) const {
+ // While updating the cell map can be done in a meaningful way for
+ // a part of a register, it makes little sense to implement it as the
+ // SSA representation would never contain such "partial definitions".
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return;
+ assert(RR.Sub == 0 && "Unexpected sub-register in definition");
+ // Eliminate all ref-to-reg-0 bit values: replace them with "self".
+ for (unsigned i = 0, n = RC.width(); i < n; ++i) {
+ const BitValue &V = RC[i];
+ if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+ RC[i].RefI = BitRef(RR.Reg, i);
+ }
+ M[RR.Reg] = RC;
+}
+
+
+// Check if the cell represents a compile-time integer value.
+bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
+ uint16_t W = A.width();
+ for (uint16_t i = 0; i < W; ++i)
+ if (!A[i].is(0) && !A[i].is(1))
+ return false;
+ return true;
+}
+
+
+// Convert a cell to the integer value. The result must fit in uint64_t.
+uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
+ assert(isInt(A));
+ uint64_t Val = 0;
+ uint16_t W = A.width();
+ for (uint16_t i = 0; i < W; ++i) {
+ Val <<= 1;
+ Val |= A[i].is(1);
+ }
+ return Val;
+}
+
+
+// Evaluator helper functions. These implement some common operation on
+// register cells that can be used to implement target-specific instructions
+// in a target-specific evaluator.
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
+ RegisterCell Res(W);
+ // For bits beyond the 63rd, this will generate the sign bit of V.
+ for (uint16_t i = 0; i < W; ++i) {
+ Res[i] = BitValue(V & 1);
+ V >>= 1;
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
+ APInt A = CI->getValue();
+ uint16_t BW = A.getBitWidth();
+ assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow");
+ RegisterCell Res(BW);
+ for (uint16_t i = 0; i < BW; ++i)
+ Res[i] = A[i];
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ bool Carry = false;
+ uint16_t I;
+ for (I = 0; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (!V1.num() || !V2.num())
+ break;
+ unsigned S = bool(V1) + bool(V2) + Carry;
+ Res[I] = BitValue(S & 1);
+ Carry = (S > 1);
+ }
+ for (; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ // If the next bit is same as Carry, the result will be 0 plus the
+ // other bit. The Carry bit will remain unchanged.
+ if (V1.is(Carry))
+ Res[I] = BitValue::ref(V2);
+ else if (V2.is(Carry))
+ Res[I] = BitValue::ref(V1);
+ else
+ break;
+ }
+ for (; I < W; ++I)
+ Res[I] = BitValue::self();
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ bool Borrow = false;
+ uint16_t I;
+ for (I = 0; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (!V1.num() || !V2.num())
+ break;
+ unsigned S = bool(V1) - bool(V2) - Borrow;
+ Res[I] = BitValue(S & 1);
+ Borrow = (S > 1);
+ }
+ for (; I < W; ++I) {
+ const BitValue &V1 = A1[I];
+ const BitValue &V2 = A2[I];
+ if (V1.is(Borrow)) {
+ Res[I] = BitValue::ref(V2);
+ break;
+ }
+ if (V2.is(Borrow))
+ Res[I] = BitValue::ref(V1);
+ else
+ break;
+ }
+ for (; I < W; ++I)
+ Res[I] = BitValue::self();
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width() + A2.width();
+ uint16_t Z = A1.ct(0) + A2.ct(0);
+ RegisterCell Res(W);
+ Res.fill(0, Z, BitValue::Zero);
+ Res.fill(Z, W, BitValue::self());
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width() + A2.width();
+ uint16_t Z = A1.ct(0) + A2.ct(0);
+ RegisterCell Res(W);
+ Res.fill(0, Z, BitValue::Zero);
+ Res.fill(Z, W, BitValue::self());
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
+ uint16_t Sh) const {
+ assert(Sh <= A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.rol(Sh);
+ Res.fill(0, Sh, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
+ uint16_t Sh) const {
+ uint16_t W = A1.width();
+ assert(Sh <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.rol(W-Sh);
+ Res.fill(W-Sh, W, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
+ uint16_t Sh) const {
+ uint16_t W = A1.width();
+ assert(Sh <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ BitValue Sign = Res[W-1];
+ Res.rol(W-Sh);
+ Res.fill(W-Sh, W, Sign);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(1))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(1))
+ Res[i] = BitValue::ref(V1);
+ else if (V1.is(0) || V2.is(0))
+ Res[i] = BitValue::Zero;
+ else if (V1 == V2)
+ Res[i] = V1;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(1) || V2.is(1))
+ Res[i] = BitValue::One;
+ else if (V1.is(0))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(0))
+ Res[i] = BitValue::ref(V1);
+ else if (V1 == V2)
+ Res[i] = V1;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
+ const RegisterCell &A2) const {
+ uint16_t W = A1.width();
+ assert(W == A2.width());
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V1 = A1[i];
+ const BitValue &V2 = A2[i];
+ if (V1.is(0))
+ Res[i] = BitValue::ref(V2);
+ else if (V2.is(0))
+ Res[i] = BitValue::ref(V1);
+ else if (V1 == V2)
+ Res[i] = BitValue::Zero;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
+ uint16_t W = A1.width();
+ RegisterCell Res(W);
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitValue &V = A1[i];
+ if (V.is(0))
+ Res[i] = BitValue::One;
+ else if (V.is(1))
+ Res[i] = BitValue::Zero;
+ else
+ Res[i] = BitValue::self();
+ }
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
+ uint16_t BitN) const {
+ assert(BitN < A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res[BitN] = BitValue::One;
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
+ uint16_t BitN) const {
+ assert(BitN < A1.width());
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res[BitN] = BitValue::Zero;
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
+ uint16_t W) const {
+ uint16_t C = A1.cl(B), AW = A1.width();
+ // If the last leading non-B bit is not a constant, then we don't know
+ // the real count.
+ if ((C < AW && A1[AW-1-C].num()) || C == AW)
+ return eIMM(C, W);
+ return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
+ uint16_t W) const {
+ uint16_t C = A1.ct(B), AW = A1.width();
+ // If the last trailing non-B bit is not a constant, then we don't know
+ // the real count.
+ if ((C < AW && A1[C].num()) || C == AW)
+ return eIMM(C, W);
+ return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
+ uint16_t FromN) const {
+ uint16_t W = A1.width();
+ assert(FromN <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ BitValue Sign = Res[FromN-1];
+ // Sign-extend "inreg".
+ Res.fill(FromN, W, Sign);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
+ uint16_t FromN) const {
+ uint16_t W = A1.width();
+ assert(FromN <= W);
+ RegisterCell Res = RegisterCell::ref(A1);
+ Res.fill(FromN, W, BitValue::Zero);
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
+ uint16_t B, uint16_t E) const {
+ uint16_t W = A1.width();
+ assert(B < W && E <= W);
+ if (B == E)
+ return RegisterCell(0);
+ uint16_t Last = (E > 0) ? E-1 : W-1;
+ RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last));
+ // Return shorter cell.
+ return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
+ const RegisterCell &A2, uint16_t AtN) const {
+ uint16_t W1 = A1.width(), W2 = A2.width();
+ (void)W1;
+ assert(AtN < W1 && AtN+W2 <= W1);
+ // Copy bits from A1, insert A2 at position AtN.
+ RegisterCell Res = RegisterCell::ref(A1);
+ if (W2 > 0)
+ Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1));
+ return Res;
+}
+
+
+BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
+ assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
+ uint16_t W = getRegBitWidth(Reg);
+ assert(W > 0 && "Cannot generate mask for empty register");
+ return BitMask(0, W-1);
+}
+
+
+bool BT::MachineEvaluator::evaluate(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::REG_SEQUENCE: {
+ RegisterRef RD = MI->getOperand(0);
+ assert(RD.Sub == 0);
+ RegisterRef RS = MI->getOperand(1);
+ unsigned SS = MI->getOperand(2).getImm();
+ RegisterRef RT = MI->getOperand(3);
+ unsigned ST = MI->getOperand(4).getImm();
+ assert(SS != ST);
+
+ uint16_t W = getRegBitWidth(RD);
+ RegisterCell Res(W);
+ Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS));
+ Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST));
+ putCell(RD, Res, Outputs);
+ break;
+ }
+
+ case TargetOpcode::COPY: {
+ // COPY can transfer a smaller register into a wider one.
+ // If that is the case, fill the remaining high bits with 0.
+ RegisterRef RD = MI->getOperand(0);
+ RegisterRef RS = MI->getOperand(1);
+ assert(RD.Sub == 0);
+ uint16_t WD = getRegBitWidth(RD);
+ uint16_t WS = getRegBitWidth(RS);
+ assert(WD >= WS);
+ RegisterCell Src = getCell(RS, Inputs);
+ RegisterCell Res(WD);
+ Res.insert(Src, BitMask(0, WS-1));
+ Res.fill(WS, WD, BitValue::Zero);
+ putCell(RD, Res, Outputs);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+
+// Main W-Z implementation.
+
+void BT::visitPHI(const MachineInstr *PI) {
+ int ThisN = PI->getParent()->getNumber();
+ if (Trace)
+ dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI;
+
+ const MachineOperand &MD = PI->getOperand(0);
+ assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition");
+ RegisterRef DefRR(MD);
+ uint16_t DefBW = ME.getRegBitWidth(DefRR);
+
+ RegisterCell DefC = ME.getCell(DefRR, Map);
+ if (DefC == RegisterCell::self(DefRR.Reg, DefBW)) // XXX slow
+ return;
+
+ bool Changed = false;
+
+ for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) {
+ const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB();
+ int PredN = PB->getNumber();
+ if (Trace)
+ dbgs() << " edge BB#" << PredN << "->BB#" << ThisN;
+ if (!EdgeExec.count(CFGEdge(PredN, ThisN))) {
+ if (Trace)
+ dbgs() << " not executable\n";
+ continue;
+ }
+
+ RegisterRef RU = PI->getOperand(i);
+ RegisterCell ResC = ME.getCell(RU, Map);
+ if (Trace)
+ dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+ << " cell: " << ResC << "\n";
+ Changed |= DefC.meet(ResC, DefRR.Reg);
+ }
+
+ if (Changed) {
+ if (Trace)
+ dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub)
+ << " cell: " << DefC << "\n";
+ ME.putCell(DefRR, DefC, Map);
+ visitUsesOf(DefRR.Reg);
+ }
+}
+
+
+void BT::visitNonBranch(const MachineInstr *MI) {
+ if (Trace) {
+ int ThisN = MI->getParent()->getNumber();
+ dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI;
+ }
+ if (MI->isDebugValue())
+ return;
+ assert(!MI->isBranch() && "Unexpected branch instruction");
+
+ CellMapType ResMap;
+ bool Eval = ME.evaluate(MI, Map, ResMap);
+
+ if (Trace && Eval) {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ RegisterRef RU(MO);
+ dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+ << " cell: " << ME.getCell(RU, Map) << "\n";
+ }
+ dbgs() << "Outputs:\n";
+ for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end();
+ I != E; ++I) {
+ RegisterRef RD(I->first);
+ dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: "
+ << ME.getCell(RD, ResMap) << "\n";
+ }
+ }
+
+ // Iterate over all definitions of the instruction, and update the
+ // cells accordingly.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Visit register defs only.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ RegisterRef RD(MO);
+ assert(RD.Sub == 0 && "Unexpected sub-register in definition");
+ if (!TargetRegisterInfo::isVirtualRegister(RD.Reg))
+ continue;
+
+ bool Changed = false;
+ if (!Eval || ResMap.count(RD.Reg) == 0) {
+ // Set to "ref" (aka "bottom").
+ uint16_t DefBW = ME.getRegBitWidth(RD);
+ RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
+ if (RefC != ME.getCell(RD, Map)) {
+ ME.putCell(RD, RefC, Map);
+ Changed = true;
+ }
+ } else {
+ RegisterCell DefC = ME.getCell(RD, Map);
+ RegisterCell ResC = ME.getCell(RD, ResMap);
+ // This is a non-phi instruction, so the values of the inputs come
+ // from the same registers each time this instruction is evaluated.
+ // During the propagation, the values of the inputs can become lowered
+ // in the sense of the lattice operation, which may cause different
+ // results to be calculated in subsequent evaluations. This should
+ // not cause the bottoming of the result in the map, since the new
+ // result is already reflecting the lowered inputs.
+ for (uint16_t i = 0, w = DefC.width(); i < w; ++i) {
+ BitValue &V = DefC[i];
+ // Bits that are already "bottom" should not be updated.
+ if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg)
+ continue;
+ // Same for those that are identical in DefC and ResC.
+ if (V == ResC[i])
+ continue;
+ V = ResC[i];
+ Changed = true;
+ }
+ if (Changed)
+ ME.putCell(RD, DefC, Map);
+ }
+ if (Changed)
+ visitUsesOf(RD.Reg);
+ }
+}
+
+
+void BT::visitBranchesFrom(const MachineInstr *BI) {
+ const MachineBasicBlock &B = *BI->getParent();
+ MachineBasicBlock::const_iterator It = BI, End = B.end();
+ BranchTargetList Targets, BTs;
+ bool FallsThrough = true, DefaultToAll = false;
+ int ThisN = B.getNumber();
+
+ do {
+ BTs.clear();
+ const MachineInstr *MI = &*It;
+ if (Trace)
+ dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI;
+ assert(MI->isBranch() && "Expecting branch instruction");
+ InstrExec.insert(MI);
+ bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough);
+ if (!Eval) {
+ // If the evaluation failed, we will add all targets. Keep going in
+ // the loop to mark all executable branches as such.
+ DefaultToAll = true;
+ FallsThrough = true;
+ if (Trace)
+ dbgs() << " failed to evaluate: will add all CFG successors\n";
+ } else if (!DefaultToAll) {
+ // If evaluated successfully add the targets to the cumulative list.
+ if (Trace) {
+ dbgs() << " adding targets:";
+ for (unsigned i = 0, n = BTs.size(); i < n; ++i)
+ dbgs() << " BB#" << BTs[i]->getNumber();
+ if (FallsThrough)
+ dbgs() << "\n falls through\n";
+ else
+ dbgs() << "\n does not fall through\n";
+ }
+ Targets.insert(BTs.begin(), BTs.end());
+ }
+ ++It;
+ } while (FallsThrough && It != End);
+
+ typedef MachineBasicBlock::const_succ_iterator succ_iterator;
+ if (!DefaultToAll) {
+ // Need to add all CFG successors that lead to EH landing pads.
+ // There won't be explicit branches to these blocks, but they must
+ // be processed.
+ for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
+ const MachineBasicBlock *SB = *I;
+ if (SB->isEHPad())
+ Targets.insert(SB);
+ }
+ if (FallsThrough) {
+ MachineFunction::const_iterator BIt = B.getIterator();
+ MachineFunction::const_iterator Next = std::next(BIt);
+ if (Next != MF.end())
+ Targets.insert(&*Next);
+ }
+ } else {
+ for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I)
+ Targets.insert(*I);
+ }
+
+ for (unsigned i = 0, n = Targets.size(); i < n; ++i) {
+ int TargetN = Targets[i]->getNumber();
+ FlowQ.push(CFGEdge(ThisN, TargetN));
+ }
+}
+
+
+void BT::visitUsesOf(unsigned Reg) {
+ if (Trace)
+ dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+ use_iterator End = MRI.use_nodbg_end();
+ for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (!InstrExec.count(UseI))
+ continue;
+ if (UseI->isPHI())
+ visitPHI(UseI);
+ else if (!UseI->isBranch())
+ visitNonBranch(UseI);
+ else
+ visitBranchesFrom(UseI);
+ }
+}
+
+
+BT::RegisterCell BT::get(RegisterRef RR) const {
+ return ME.getCell(RR, Map);
+}
+
+
+void BT::put(RegisterRef RR, const RegisterCell &RC) {
+ ME.putCell(RR, RC, Map);
+}
+
+
+// Replace all references to bits from OldRR with the corresponding bits
+// in NewRR.
+void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
+ assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map");
+ BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
+ BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
+ uint16_t OMB = OM.first(), OME = OM.last();
+ uint16_t NMB = NM.first(), NME = NM.last();
+ (void)NME;
+ assert((OME-OMB == NME-NMB) &&
+ "Substituting registers of different lengths");
+ for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+ RegisterCell &RC = I->second;
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ BitValue &V = RC[i];
+ if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg)
+ continue;
+ if (V.RefI.Pos < OMB || V.RefI.Pos > OME)
+ continue;
+ V.RefI.Reg = NewRR.Reg;
+ V.RefI.Pos += NMB-OMB;
+ }
+ }
+}
+
+
+// Check if the block has been "executed" during propagation. (If not, the
+// block is dead, but it may still appear to be reachable.)
+bool BT::reached(const MachineBasicBlock *B) const {
+ int BN = B->getNumber();
+ assert(BN >= 0);
+ for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
+ I != E; ++I) {
+ if (I->second == BN)
+ return true;
+ }
+ return false;
+}
+
+
+void BT::reset() {
+ EdgeExec.clear();
+ InstrExec.clear();
+ Map.clear();
+}
+
+
+void BT::run() {
+ reset();
+ assert(FlowQ.empty());
+
+ typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits;
+ const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF);
+
+ unsigned MaxBN = 0;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ assert(I->getNumber() >= 0 && "Disconnected block");
+ unsigned BN = I->getNumber();
+ if (BN > MaxBN)
+ MaxBN = BN;
+ }
+
+ // Keep track of visited blocks.
+ BitVector BlockScanned(MaxBN+1);
+
+ int EntryN = Entry->getNumber();
+ // Generate a fake edge to get something to start with.
+ FlowQ.push(CFGEdge(-1, EntryN));
+
+ while (!FlowQ.empty()) {
+ CFGEdge Edge = FlowQ.front();
+ FlowQ.pop();
+
+ if (EdgeExec.count(Edge))
+ continue;
+ EdgeExec.insert(Edge);
+
+ const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
+ MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
+ // Visit PHI nodes first.
+ while (It != End && It->isPHI()) {
+ const MachineInstr *PI = &*It++;
+ InstrExec.insert(PI);
+ visitPHI(PI);
+ }
+
+ // If this block has already been visited through a flow graph edge,
+ // then the instructions have already been processed. Any updates to
+ // the cells would now only happen through visitUsesOf...
+ if (BlockScanned[Edge.second])
+ continue;
+ BlockScanned[Edge.second] = true;
+
+ // Visit non-branch instructions.
+ while (It != End && !It->isBranch()) {
+ const MachineInstr *MI = &*It++;
+ InstrExec.insert(MI);
+ visitNonBranch(MI);
+ }
+ // If block end has been reached, add the fall-through edge to the queue.
+ if (It == End) {
+ MachineFunction::const_iterator BIt = B.getIterator();
+ MachineFunction::const_iterator Next = std::next(BIt);
+ if (Next != MF.end() && B.isSuccessor(&*Next)) {
+ int ThisN = B.getNumber();
+ int NextN = Next->getNumber();
+ FlowQ.push(CFGEdge(ThisN, NextN));
+ }
+ } else {
+ // Handle the remaining sequence of branches. This function will update
+ // the work queue.
+ visitBranchesFrom(It);
+ }
+ } // while (!FlowQ->empty())
+
+ if (Trace) {
+ dbgs() << "Cells after propagation:\n";
+ for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
+ dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
new file mode 100644
index 0000000..959c831
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -0,0 +1,435 @@
+//===--- BitTracker.h -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITTRACKER_H
+#define BITTRACKER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#include <map>
+#include <queue>
+#include <set>
+
+namespace llvm {
+ class ConstantInt;
+ class MachineRegisterInfo;
+ class MachineBasicBlock;
+ class MachineInstr;
+ class MachineOperand;
+ class raw_ostream;
+
+struct BitTracker {
+ struct BitRef;
+ struct RegisterRef;
+ struct BitValue;
+ struct BitMask;
+ struct RegisterCell;
+ struct MachineEvaluator;
+
+ typedef SetVector<const MachineBasicBlock *> BranchTargetList;
+
+ typedef std::map<unsigned, RegisterCell> CellMapType;
+
+ BitTracker(const MachineEvaluator &E, MachineFunction &F);
+ ~BitTracker();
+
+ void run();
+ void trace(bool On = false) { Trace = On; }
+ bool has(unsigned Reg) const;
+ const RegisterCell &lookup(unsigned Reg) const;
+ RegisterCell get(RegisterRef RR) const;
+ void put(RegisterRef RR, const RegisterCell &RC);
+ void subst(RegisterRef OldRR, RegisterRef NewRR);
+ bool reached(const MachineBasicBlock *B) const;
+
+private:
+ void visitPHI(const MachineInstr *PI);
+ void visitNonBranch(const MachineInstr *MI);
+ void visitBranchesFrom(const MachineInstr *BI);
+ void visitUsesOf(unsigned Reg);
+ void reset();
+
+ typedef std::pair<int,int> CFGEdge;
+ typedef std::set<CFGEdge> EdgeSetType;
+ typedef std::set<const MachineInstr *> InstrSetType;
+ typedef std::queue<CFGEdge> EdgeQueueType;
+
+ EdgeSetType EdgeExec; // Executable flow graph edges.
+ InstrSetType InstrExec; // Executable instructions.
+ EdgeQueueType FlowQ; // Work queue of CFG edges.
+ bool Trace; // Enable tracing for debugging.
+
+ const MachineEvaluator &ME;
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ CellMapType &Map;
+};
+
+
+// Abstraction of a reference to bit at position Pos from a register Reg.
+struct BitTracker::BitRef {
+ BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+ bool operator== (const BitRef &BR) const {
+ // If Reg is 0, disregard Pos.
+ return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
+ }
+ unsigned Reg;
+ uint16_t Pos;
+};
+
+
+// Abstraction of a register reference in MachineOperand. It contains the
+// register number and the subregister index.
+struct BitTracker::RegisterRef {
+ RegisterRef(unsigned R = 0, unsigned S = 0)
+ : Reg(R), Sub(S) {}
+ RegisterRef(const MachineOperand &MO)
+ : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+ unsigned Reg, Sub;
+};
+
+
+// Value that a single bit can take. This is outside of the context of
+// any register, it is more of an abstraction of the two-element set of
+// possible bit values. One extension here is the "Ref" type, which
+// indicates that this bit takes the same value as the bit described by
+// RefInfo.
+struct BitTracker::BitValue {
+ enum ValueType {
+ Top, // Bit not yet defined.
+ Zero, // Bit = 0.
+ One, // Bit = 1.
+ Ref // Bit value same as the one described in RefI.
+ // Conceptually, there is no explicit "bottom" value: the lattice's
+ // bottom will be expressed as a "ref to itself", which, in the context
+ // of registers, could be read as "this value of this bit is defined by
+ // this bit".
+ // The ordering is:
+ // x <= Top,
+ // Self <= x, where "Self" is "ref to itself".
+ // This makes the value lattice different for each virtual register
+ // (even for each bit in the same virtual register), since the "bottom"
+ // for one register will be a simple "ref" for another register.
+ // Since we do not store the "Self" bit and register number, the meet
+ // operation will need to take it as a parameter.
+ //
+ // In practice there is a special case for values that are not associa-
+ // ted with any specific virtual register. An example would be a value
+ // corresponding to a bit of a physical register, or an intermediate
+ // value obtained in some computation (such as instruction evaluation).
+ // Such cases are identical to the usual Ref type, but the register
+ // number is 0. In such case the Pos field of the reference is ignored.
+ //
+ // What is worthy of notice is that in value V (that is a "ref"), as long
+ // as the RefI.Reg is not 0, it may actually be the same register as the
+ // one in which V will be contained. If the RefI.Pos refers to the posi-
+ // tion of V, then V is assumed to be "bottom" (as a "ref to itself"),
+ // otherwise V is taken to be identical to the referenced bit of the
+ // same register.
+ // If RefI.Reg is 0, however, such a reference to the same register is
+ // not possible. Any value V that is a "ref", and whose RefI.Reg is 0
+ // is treated as "bottom".
+ };
+ ValueType Type;
+ BitRef RefI;
+
+ BitValue(ValueType T = Top) : Type(T) {}
+ BitValue(bool B) : Type(B ? One : Zero) {}
+ BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
+
+ bool operator== (const BitValue &V) const {
+ if (Type != V.Type)
+ return false;
+ if (Type == Ref && !(RefI == V.RefI))
+ return false;
+ return true;
+ }
+ bool operator!= (const BitValue &V) const {
+ return !operator==(V);
+ }
+ bool is(unsigned T) const {
+ assert(T == 0 || T == 1);
+ return T == 0 ? Type == Zero
+ : (T == 1 ? Type == One : false);
+ }
+
+ // The "meet" operation is the "." operation in a semilattice (L, ., T, B):
+ // (1) x.x = x
+ // (2) x.y = y.x
+ // (3) x.(y.z) = (x.y).z
+ // (4) x.T = x (i.e. T = "top")
+ // (5) x.B = B (i.e. B = "bottom")
+ //
+ // This "meet" function will update the value of the "*this" object with
+ // the newly calculated one, and return "true" if the value of *this has
+ // changed, and "false" otherwise.
+ // To prove that it satisfies the conditions (1)-(5), it is sufficient
+ // to show that a relation
+ // x <= y <=> x.y = x
+ // defines a partial order (i.e. that "meet" is same as "infimum").
+ bool meet(const BitValue &V, const BitRef &Self) {
+ // First, check the cases where there is nothing to be done.
+ if (Type == Ref && RefI == Self) // Bottom.meet(V) = Bottom (i.e. This)
+ return false;
+ if (V.Type == Top) // This.meet(Top) = This
+ return false;
+ if (*this == V) // This.meet(This) = This
+ return false;
+
+ // At this point, we know that the value of "this" will change.
+ // If it is Top, it will become the same as V, otherwise it will
+ // become "bottom" (i.e. Self).
+ if (Type == Top) {
+ Type = V.Type;
+ RefI = V.RefI; // This may be irrelevant, but copy anyway.
+ return true;
+ }
+ // Become "bottom".
+ Type = Ref;
+ RefI = Self;
+ return true;
+ }
+
+ // Create a reference to the bit value V.
+ static BitValue ref(const BitValue &V);
+ // Create a "self".
+ static BitValue self(const BitRef &Self = BitRef());
+
+ bool num() const {
+ return Type == Zero || Type == One;
+ }
+ operator bool() const {
+ assert(Type == Zero || Type == One);
+ return Type == One;
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
+};
+
+
+// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
+inline BitTracker::BitValue
+BitTracker::BitValue::ref(const BitValue &V) {
+ if (V.Type != Ref)
+ return BitValue(V.Type);
+ if (V.RefI.Reg != 0)
+ return BitValue(V.RefI.Reg, V.RefI.Pos);
+ return self();
+}
+
+
+inline BitTracker::BitValue
+BitTracker::BitValue::self(const BitRef &Self) {
+ return BitValue(Self.Reg, Self.Pos);
+}
+
+
+// A sequence of bits starting from index B up to and including index E.
+// If E < B, the mask represents two sections: [0..E] and [B..W) where
+// W is the width of the register.
+struct BitTracker::BitMask {
+ BitMask() : B(0), E(0) {}
+ BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+ uint16_t first() const { return B; }
+ uint16_t last() const { return E; }
+private:
+ uint16_t B, E;
+};
+
+
+// Representation of a register: a list of BitValues.
+struct BitTracker::RegisterCell {
+ RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
+
+ uint16_t width() const {
+ return Bits.size();
+ }
+ const BitValue &operator[](uint16_t BitN) const {
+ assert(BitN < Bits.size());
+ return Bits[BitN];
+ }
+ BitValue &operator[](uint16_t BitN) {
+ assert(BitN < Bits.size());
+ return Bits[BitN];
+ }
+
+ bool meet(const RegisterCell &RC, unsigned SelfR);
+ RegisterCell &insert(const RegisterCell &RC, const BitMask &M);
+ RegisterCell extract(const BitMask &M) const; // Returns a new cell.
+ RegisterCell &rol(uint16_t Sh); // Rotate left.
+ RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V);
+ RegisterCell &cat(const RegisterCell &RC); // Concatenate.
+ uint16_t cl(bool B) const;
+ uint16_t ct(bool B) const;
+
+ bool operator== (const RegisterCell &RC) const;
+ bool operator!= (const RegisterCell &RC) const {
+ return !operator==(RC);
+ }
+
+ // Generate a "ref" cell for the corresponding register. In the resulting
+ // cell each bit will be described as being the same as the corresponding
+ // bit in register Reg (i.e. the cell is "defined" by register Reg).
+ static RegisterCell self(unsigned Reg, uint16_t Width);
+ // Generate a "top" cell of given size.
+ static RegisterCell top(uint16_t Width);
+ // Generate a cell that is a "ref" to another cell.
+ static RegisterCell ref(const RegisterCell &C);
+
+private:
+ // The DefaultBitN is here only to avoid frequent reallocation of the
+ // memory in the vector.
+ static const unsigned DefaultBitN = 32;
+ typedef SmallVector<BitValue, DefaultBitN> BitValueList;
+ BitValueList Bits;
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
+};
+
+
+inline bool BitTracker::has(unsigned Reg) const {
+ return Map.find(Reg) != Map.end();
+}
+
+
+inline const BitTracker::RegisterCell&
+BitTracker::lookup(unsigned Reg) const {
+ CellMapType::const_iterator F = Map.find(Reg);
+ assert(F != Map.end());
+ return F->second;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
+ RegisterCell RC(Width);
+ for (uint16_t i = 0; i < Width; ++i)
+ RC.Bits[i] = BitValue::self(BitRef(Reg, i));
+ return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::top(uint16_t Width) {
+ RegisterCell RC(Width);
+ for (uint16_t i = 0; i < Width; ++i)
+ RC.Bits[i] = BitValue(BitValue::Top);
+ return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::ref(const RegisterCell &C) {
+ uint16_t W = C.width();
+ RegisterCell RC(W);
+ for (unsigned i = 0; i < W; ++i)
+ RC[i] = BitValue::ref(C[i]);
+ return RC;
+}
+
+// A class to evaluate target's instructions and update the cell maps.
+// This is used internally by the bit tracker. A target that wants to
+// utilize this should implement the evaluation functions (noted below)
+// in a subclass of this class.
+struct BitTracker::MachineEvaluator {
+ MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
+ : TRI(T), MRI(M) {}
+ virtual ~MachineEvaluator() {}
+
+ uint16_t getRegBitWidth(const RegisterRef &RR) const;
+
+ RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
+ void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+ // A result of any operation should use refs to the source cells, not
+ // the cells directly. This function is a convenience wrapper to quickly
+ // generate a ref for a cell corresponding to a register reference.
+ RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const {
+ RegisterCell RC = getCell(RR, M);
+ return RegisterCell::ref(RC);
+ }
+
+ // Helper functions.
+ // Check if a cell is an immediate value (i.e. all bits are either 0 or 1).
+ bool isInt(const RegisterCell &A) const;
+ // Convert cell to an immediate value.
+ uint64_t toInt(const RegisterCell &A) const;
+
+ // Generate cell from an immediate value.
+ RegisterCell eIMM(int64_t V, uint16_t W) const;
+ RegisterCell eIMM(const ConstantInt *CI) const;
+
+ // Arithmetic.
+ RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const;
+
+ // Shifts.
+ RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const;
+ RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const;
+ RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const;
+
+ // Logical.
+ RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const;
+ RegisterCell eNOT(const RegisterCell &A1) const;
+
+ // Set bit, clear bit.
+ RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const;
+ RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const;
+
+ // Count leading/trailing bits (zeros/ones).
+ RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const;
+ RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const;
+
+ // Sign/zero extension.
+ RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const;
+ RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const;
+
+ // Extract/insert
+ // XTR R,b,e: extract bits from A1 starting at bit b, ending at e-1.
+ // INS R,S,b: take R and replace bits starting from b with S.
+ RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const;
+ RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2,
+ uint16_t AtN) const;
+
+ // User-provided functions for individual targets:
+
+ // Return a sub-register mask that indicates which bits in Reg belong
+ // to the subregister Sub. These bits are assumed to be contiguous in
+ // the super-register, and have the same ordering in the sub-register
+ // as in the super-register. It is valid to call this function with
+ // Sub == 0, in this case, the function should return a mask that spans
+ // the entire register Reg (which is what the default implementation
+ // does).
+ virtual BitMask mask(unsigned Reg, unsigned Sub) const;
+ // Indicate whether a given register class should be tracked.
+ virtual bool track(const TargetRegisterClass *RC) const { return true; }
+ // Evaluate a non-branching machine instruction, given the cell map with
+ // the input values. Place the results in the Outputs map. Return "true"
+ // if evaluation succeeded, "false" otherwise.
+ virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+ // Evaluate a branch, given the cell map with the input values. Fill out
+ // a list of all possible branch targets and indicate (through a flag)
+ // whether the branch could fall-through. Return "true" if this information
+ // has been successfully computed, "false" otherwise.
+ virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+ BranchTargetList &Targets, bool &FallsThru) const = 0;
+
+ const TargetRegisterInfo &TRI;
+ MachineRegisterInfo &MRI;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
new file mode 100644
index 0000000..4a9c341
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -0,0 +1,1605 @@
+//===-- HexagonDisassembler.cpp - Disassembler for Hexagon ISA ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-disassembler"
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <vector>
+
+using namespace llvm;
+using namespace Hexagon;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// \brief Hexagon disassembler for all Hexagon platforms.
+class HexagonDisassembler : public MCDisassembler {
+public:
+ std::unique_ptr<MCInstrInfo const> const MCII;
+ std::unique_ptr<MCInst *> CurrentBundle;
+ HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
+
+ DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream,
+ bool &Complete) const;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+
+ void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
+ void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
+};
+}
+
+// Forward declare these because the auto-generated code will reference them.
+// Definitions are further down.
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder);
+
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+ raw_ostream &os);
+
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+
+#include "HexagonGenDisassemblerTables.inc"
+
+static MCDisassembler *createHexagonDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
+}
+
+extern "C" void LLVMInitializeHexagonDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheHexagonTarget,
+ createHexagonDisassembler);
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ DecodeStatus Result = DecodeStatus::Success;
+ bool Complete = false;
+ Size = 0;
+
+ *CurrentBundle = &MI;
+ MI = HexagonMCInstrInfo::createBundle();
+ while (Result == Success && Complete == false) {
+ if (Bytes.size() < HEXAGON_INSTR_SIZE)
+ return MCDisassembler::Fail;
+ MCInst *Inst = new (getContext()) MCInst;
+ Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete);
+ MI.addOperand(MCOperand::createInst(Inst));
+ Size += HEXAGON_INSTR_SIZE;
+ Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
+ }
+ if(Result == MCDisassembler::Fail)
+ return Result;
+ HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+ if(!Checker.check())
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+namespace {
+HexagonDisassembler const &disassembler(void const *Decoder) {
+ return *static_cast<HexagonDisassembler const *>(Decoder);
+}
+MCContext &contextFromDecoder(void const *Decoder) {
+ return disassembler(Decoder).getContext();
+}
+}
+
+DecodeStatus HexagonDisassembler::getSingleInstruction(
+ MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &os, raw_ostream &cs, bool &Complete) const {
+ assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
+
+ uint32_t Instruction =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+
+ auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ HexagonII::INST_PARSE_LOOP_END) {
+ if (BundleSize == 0)
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ else if (BundleSize == 1)
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ else
+ return DecodeStatus::Fail;
+ }
+
+ DecodeStatus Result = DecodeStatus::Success;
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ HexagonII::INST_PARSE_DUPLEX) {
+ // Determine the instruction class of each instruction in the duplex.
+ unsigned duplexIClass, IClassLow, IClassHigh;
+
+ duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
+ switch (duplexIClass) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ IClassLow = HexagonII::HSIG_L1;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 1:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 2:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 3:
+ IClassLow = HexagonII::HSIG_A;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 4:
+ IClassLow = HexagonII::HSIG_L1;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 5:
+ IClassLow = HexagonII::HSIG_L2;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 6:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 7:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_A;
+ break;
+ case 8:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 9:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 10:
+ IClassLow = HexagonII::HSIG_S1;
+ IClassHigh = HexagonII::HSIG_S1;
+ break;
+ case 11:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_S1;
+ break;
+ case 12:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_L1;
+ break;
+ case 13:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_L2;
+ break;
+ case 14:
+ IClassLow = HexagonII::HSIG_S2;
+ IClassHigh = HexagonII::HSIG_S2;
+ break;
+ }
+
+ // Set the MCInst to be a duplex instruction. Which one doesn't matter.
+ MI.setOpcode(Hexagon::DuplexIClass0);
+
+ // Decode each instruction in the duplex.
+ // Create an MCInst for each instruction.
+ unsigned instLow = Instruction & 0x1fff;
+ unsigned instHigh = (Instruction >> 16) & 0x1fff;
+ unsigned opLow;
+ if (GetSubinstOpcode(IClassLow, instLow, opLow, os) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+ unsigned opHigh;
+ if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) !=
+ MCDisassembler::Success)
+ return MCDisassembler::Fail;
+ MCInst *MILow = new (getContext()) MCInst;
+ MILow->setOpcode(opLow);
+ MCInst *MIHigh = new (getContext()) MCInst;
+ MIHigh->setOpcode(opHigh);
+ addSubinstOperands(MILow, opLow, instLow);
+ addSubinstOperands(MIHigh, opHigh, instHigh);
+ // see ConvertToSubInst() in
+ // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+
+ // Add the duplex instruction MCInsts as operands to the passed in MCInst.
+ MCOperand OPLow = MCOperand::createInst(MILow);
+ MCOperand OPHigh = MCOperand::createInst(MIHigh);
+ MI.addOperand(OPLow);
+ MI.addOperand(OPHigh);
+ Complete = true;
+ } else {
+ if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+ HexagonII::INST_PARSE_PACKET_END)
+ Complete = true;
+ // Calling the auto-generated decoder function.
+ Result =
+ decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+
+ // If a, "standard" insn isn't found check special cases.
+ if (MCDisassembler::Success != Result ||
+ MI.getOpcode() == Hexagon::A4_ext) {
+ Result = decodeImmext(MI, Instruction, this);
+ if (MCDisassembler::Success != Result) {
+ Result = decodeSpecial(MI, Instruction);
+ }
+ } else {
+ // If the instruction is a compound instruction, register values will
+ // follow the duplex model, so the register values in the MCInst are
+ // incorrect. If the instruction is a compound, loop through the
+ // operands and change registers appropriately.
+ if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
+ HexagonII::TypeCOMPOUND) {
+ for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
+ if (i->isReg()) {
+ unsigned reg = i->getReg() - Hexagon::R0;
+ i->setReg(getRegFromSubinstEncoding(reg));
+ }
+ }
+ }
+ }
+ }
+
+ if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
+ unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
+ MCOperand &MCO = MI.getOperand(OpIndex);
+ assert(MCO.isReg() && "New value consumers must be registers");
+ unsigned Register =
+ getContext().getRegisterInfo()->getEncodingValue(MCO.getReg());
+ if ((Register & 0x6) == 0)
+ // HexagonPRM 10.11 Bit 1-2 == 0 is reserved
+ return MCDisassembler::Fail;
+ unsigned Lookback = (Register & 0x6) >> 1;
+ unsigned Offset = 1;
+ bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.end() - 1;
+ for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
+ if (i == n)
+ // Couldn't find producer
+ return MCDisassembler::Fail;
+ if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst()))
+ // Skip scalars when calculating distances for vectors
+ ++Lookback;
+ if (HexagonMCInstrInfo::isImmext(*i->getInst()))
+ ++Lookback;
+ if (Offset == Lookback)
+ break;
+ }
+ auto const &Inst = *i->getInst();
+ bool SubregBit = (Register & 0x1) != 0;
+ if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) {
+ // If subreg bit is set we're selecting the second produced newvalue
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg();
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
+ if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
+ Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
+ else if (SubregBit)
+ // Subreg bit should not be set for non-doublevector newvalue producers
+ return MCDisassembler::Fail;
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else
+ return MCDisassembler::Fail;
+ }
+
+ adjustExtendedInstructions(MI, MCB);
+ MCInst const *Extender =
+ HexagonMCInstrInfo::extenderForIndex(MCB,
+ HexagonMCInstrInfo::bundleSize(MCB));
+ if(Extender != nullptr) {
+ MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
+ *MI.getOperand(1).getInst() : MI;
+ if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
+ !HexagonMCInstrInfo::isExtended(*MCII, Inst))
+ return MCDisassembler::Fail;
+ }
+ return Result;
+}
+
+void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
+ MCInst const &MCB) const {
+ if (!HexagonMCInstrInfo::hasExtenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
+ unsigned opcode;
+ // This code is used by the disassembler to disambiguate between GP
+ // relative and absolute addressing instructions since they both have
+ // same encoding bits. However, an absolute addressing instruction must
+ // follow an immediate extender. Disassembler alwaus select absolute
+ // addressing instructions first and uses this code to change them into
+ // GP relative instruction in the absence of the corresponding immediate
+ // extender.
+ switch (MCI.getOpcode()) {
+ case Hexagon::S2_storerbabs:
+ opcode = Hexagon::S2_storerbgp;
+ break;
+ case Hexagon::S2_storerhabs:
+ opcode = Hexagon::S2_storerhgp;
+ break;
+ case Hexagon::S2_storerfabs:
+ opcode = Hexagon::S2_storerfgp;
+ break;
+ case Hexagon::S2_storeriabs:
+ opcode = Hexagon::S2_storerigp;
+ break;
+ case Hexagon::S2_storerbnewabs:
+ opcode = Hexagon::S2_storerbnewgp;
+ break;
+ case Hexagon::S2_storerhnewabs:
+ opcode = Hexagon::S2_storerhnewgp;
+ break;
+ case Hexagon::S2_storerinewabs:
+ opcode = Hexagon::S2_storerinewgp;
+ break;
+ case Hexagon::S2_storerdabs:
+ opcode = Hexagon::S2_storerdgp;
+ break;
+ case Hexagon::L4_loadrb_abs:
+ opcode = Hexagon::L2_loadrbgp;
+ break;
+ case Hexagon::L4_loadrub_abs:
+ opcode = Hexagon::L2_loadrubgp;
+ break;
+ case Hexagon::L4_loadrh_abs:
+ opcode = Hexagon::L2_loadrhgp;
+ break;
+ case Hexagon::L4_loadruh_abs:
+ opcode = Hexagon::L2_loadruhgp;
+ break;
+ case Hexagon::L4_loadri_abs:
+ opcode = Hexagon::L2_loadrigp;
+ break;
+ case Hexagon::L4_loadrd_abs:
+ opcode = Hexagon::L2_loadrdgp;
+ break;
+ default:
+ opcode = MCI.getOpcode();
+ }
+ MCI.setOpcode(opcode);
+ }
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+ ArrayRef<MCPhysReg> Table) {
+ if (RegNo < Table.size()) {
+ Inst.addOperand(MCOperand::createReg(Table[RegNo]));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ static const MCPhysReg IntRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31};
+
+ return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
+}
+
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecRegDecoderTable[] = {
+ Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
+ Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+ Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19,
+ Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24,
+ Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29,
+ Hexagon::V30, Hexagon::V31};
+
+ return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable);
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg DoubleRegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
+ Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
+
+ return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecDblRegDecoderTable[] = {
+ Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7,
+ Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11,
+ Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+
+ return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+ Hexagon::P2, Hexagon::P3};
+
+ return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
+ Hexagon::Q2, Hexagon::Q3};
+
+ return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable);
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlRegDecoderTable[] = {
+ Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+ Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
+ Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+ Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+ };
+
+ if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlReg64DecoderTable[] = {
+ Hexagon::C1_0, Hexagon::NoRegister,
+ Hexagon::C3_2, Hexagon::NoRegister,
+ Hexagon::C7_6, Hexagon::NoRegister,
+ Hexagon::C9_8, Hexagon::NoRegister,
+ Hexagon::C11_10, Hexagon::NoRegister,
+ Hexagon::CS, Hexagon::NoRegister,
+ Hexagon::UPC, Hexagon::NoRegister
+ };
+
+ if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = Hexagon::M0;
+ break;
+ case 1:
+ Register = Hexagon::M1;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+namespace {
+uint32_t fullValue(MCInstrInfo const &MCII,
+ MCInst &MCB,
+ MCInst &MI,
+ int64_t Value) {
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+ if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+ return Value;
+ unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+ int64_t Bits;
+ bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+ assert(Success);(void)Success;
+ uint32_t Upper26 = static_cast<uint32_t>(Bits);
+ uint32_t Operand = Upper26 | Lower6;
+ return Operand;
+}
+template <size_t T>
+void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64<T>(tmp));
+ int64_t Extended = SignExtend64<32>(FullValue);
+ HexagonMCInstrInfo::addConstant(MI, Extended,
+ Disassembler.getContext());
+}
+}
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, tmp);
+ assert(FullValue >= 0 && "Negative in unsigned decoder");
+ HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<16>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<12>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<11>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<13>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<14>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<10>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
+ const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<4>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<5>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<7>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<10>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<19>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+// custom decoder for various jump/call immediates
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+ // r13_2 is not extendable, so if there are no extent bits, it's r13_2
+ if (Bits == 0)
+ Bits = 15;
+ uint32_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64(tmp, Bits));
+ int64_t Extended = SignExtend64<32>(FullValue) + Address;
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
+ 0, 4))
+ HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
+// Addressing mode dependent load store opcode map.
+// - If an insn is preceded by an extender the address is absolute.
+// - memw(##symbol) = r0
+// - If an insn is not preceded by an extender the address is GP relative.
+// - memw(gp + #symbol) = r0
+// Please note that the instructions must be ordered in the descending order
+// of their opcode.
+// HexagonII::INST_ICLASS_ST
+static const unsigned int StoreConditionalOpcodeData[][2] = {
+ {S4_pstorerdfnew_abs, 0xafc02084},
+ {S4_pstorerdtnew_abs, 0xafc02080},
+ {S4_pstorerdf_abs, 0xafc00084},
+ {S4_pstorerdt_abs, 0xafc00080},
+ {S4_pstorerinewfnew_abs, 0xafa03084},
+ {S4_pstorerinewtnew_abs, 0xafa03080},
+ {S4_pstorerhnewfnew_abs, 0xafa02884},
+ {S4_pstorerhnewtnew_abs, 0xafa02880},
+ {S4_pstorerbnewfnew_abs, 0xafa02084},
+ {S4_pstorerbnewtnew_abs, 0xafa02080},
+ {S4_pstorerinewf_abs, 0xafa01084},
+ {S4_pstorerinewt_abs, 0xafa01080},
+ {S4_pstorerhnewf_abs, 0xafa00884},
+ {S4_pstorerhnewt_abs, 0xafa00880},
+ {S4_pstorerbnewf_abs, 0xafa00084},
+ {S4_pstorerbnewt_abs, 0xafa00080},
+ {S4_pstorerifnew_abs, 0xaf802084},
+ {S4_pstoreritnew_abs, 0xaf802080},
+ {S4_pstorerif_abs, 0xaf800084},
+ {S4_pstorerit_abs, 0xaf800080},
+ {S4_pstorerhfnew_abs, 0xaf402084},
+ {S4_pstorerhtnew_abs, 0xaf402080},
+ {S4_pstorerhf_abs, 0xaf400084},
+ {S4_pstorerht_abs, 0xaf400080},
+ {S4_pstorerbfnew_abs, 0xaf002084},
+ {S4_pstorerbtnew_abs, 0xaf002080},
+ {S4_pstorerbf_abs, 0xaf000084},
+ {S4_pstorerbt_abs, 0xaf000080}};
+// HexagonII::INST_ICLASS_LD
+
+// HexagonII::INST_ICLASS_LD_ST_2
+static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
+ {L4_loadri_abs, 0x49800000},
+ {L4_loadruh_abs, 0x49600000},
+ {L4_loadrh_abs, 0x49400000},
+ {L4_loadrub_abs, 0x49200000},
+ {L4_loadrb_abs, 0x49000000},
+ {S2_storerdabs, 0x48c00000},
+ {S2_storerinewabs, 0x48a01000},
+ {S2_storerhnewabs, 0x48a00800},
+ {S2_storerbnewabs, 0x48a00000},
+ {S2_storeriabs, 0x48800000},
+ {S2_storerfabs, 0x48600000},
+ {S2_storerhabs, 0x48400000},
+ {S2_storerbabs, 0x48000000}};
+static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
+static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
+
+ unsigned MachineOpcode = 0;
+ unsigned LLVMOpcode = 0;
+
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
+ for (size_t i = 0; i < NumCondS; ++i) {
+ if ((insn & StoreConditionalOpcodeData[i][1]) ==
+ StoreConditionalOpcodeData[i][1]) {
+ MachineOpcode = StoreConditionalOpcodeData[i][1];
+ LLVMOpcode = StoreConditionalOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
+ for (size_t i = 0; i < NumLS; ++i) {
+ if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
+ MachineOpcode = LoadStoreOpcodeData[i][1];
+ LLVMOpcode = LoadStoreOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+
+ if (MachineOpcode) {
+ unsigned Value = 0;
+ unsigned shift = 0;
+ MI.setOpcode(LLVMOpcode);
+ // Remove the parse bits from the insn.
+ insn &= ~HexagonII::INST_PARSE_MASK;
+
+ switch (LLVMOpcode) {
+ default:
+ return MCDisassembler::Fail;
+ break;
+
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdfnew_abs:
+ case Hexagon::S4_pstorerdtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbnewf_abs:
+ case Hexagon::S4_pstorerbnewt_abs:
+ case Hexagon::S4_pstorerbnewfnew_abs:
+ case Hexagon::S4_pstorerbnewtnew_abs:
+ case Hexagon::S4_pstorerhnewf_abs:
+ case Hexagon::S4_pstorerhnewt_abs:
+ case Hexagon::S4_pstorerhnewfnew_abs:
+ case Hexagon::S4_pstorerhnewtnew_abs:
+ case Hexagon::S4_pstorerinewf_abs:
+ case Hexagon::S4_pstorerinewt_abs:
+ case Hexagon::S4_pstorerinewfnew_abs:
+ case Hexagon::S4_pstorerinewtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbfnew_abs:
+ case Hexagon::S4_pstorerbtnew_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhfnew_abs:
+ case Hexagon::S4_pstorerhtnew_abs:
+ case Hexagon::S4_pstorerif_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerifnew_abs:
+ case Hexagon::S4_pstoreritnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::L4_ploadrdf_abs:
+ case Hexagon::L4_ploadrdt_abs:
+ case Hexagon::L4_ploadrdfnew_abs:
+ case Hexagon::L4_ploadrdtnew_abs: {
+ // op: Rdd
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = ((insn >> 9) & UINT64_C(3));
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = ((insn >> 15) & UINT64_C(62));
+ Value |= ((insn >> 8) & UINT64_C(1));
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+ }
+
+ case Hexagon::L4_ploadrbf_abs:
+ case Hexagon::L4_ploadrbt_abs:
+ case Hexagon::L4_ploadrbfnew_abs:
+ case Hexagon::L4_ploadrbtnew_abs:
+ case Hexagon::L4_ploadrhf_abs:
+ case Hexagon::L4_ploadrht_abs:
+ case Hexagon::L4_ploadrhfnew_abs:
+ case Hexagon::L4_ploadrhtnew_abs:
+ case Hexagon::L4_ploadrubf_abs:
+ case Hexagon::L4_ploadrubt_abs:
+ case Hexagon::L4_ploadrubfnew_abs:
+ case Hexagon::L4_ploadrubtnew_abs:
+ case Hexagon::L4_ploadruhf_abs:
+ case Hexagon::L4_ploadruht_abs:
+ case Hexagon::L4_ploadruhfnew_abs:
+ case Hexagon::L4_ploadruhtnew_abs:
+ case Hexagon::L4_ploadrif_abs:
+ case Hexagon::L4_ploadrit_abs:
+ case Hexagon::L4_ploadrifnew_abs:
+ case Hexagon::L4_ploadritnew_abs:
+ // op: Rd
+ Value = insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = (insn >> 9) & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 15) & UINT64_C(62);
+ Value |= (insn >> 8) & UINT64_C(1);
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+
+ // op: g16_2
+ case (Hexagon::L4_loadri_abs):
+ ++shift;
+ // op: g16_1
+ case Hexagon::L4_loadrh_abs:
+ case Hexagon::L4_loadruh_abs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::L4_loadrb_abs:
+ case Hexagon::L4_loadrub_abs: {
+ // op: Rd
+ Value |= insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ break;
+ }
+
+ case Hexagon::L4_loadrd_abs: {
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ break;
+ }
+
+ case Hexagon::S2_storerdabs: {
+ // op: g16_3
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storerinewabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhnewabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbnewabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storeriabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhabs:
+ case Hexagon::S2_storerfabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+ }
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder) {
+
+ // Instruction Class for a constant a extender: bits 31:28 = 0x0000
+ if ((~insn & 0xf0000000) == 0xf0000000) {
+ unsigned Value;
+ // 27:16 High 12 bits of 26-bit extender.
+ Value = (insn & 0x0fff0000) << 4;
+ // 13:0 Low 14 bits of 26-bit extender.
+ Value |= ((insn & 0x3fff) << 6);
+ MI.setOpcode(Hexagon::A4_ext);
+ HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
+// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
+enum subInstBinaryValues {
+ V4_SA1_addi_BITS = 0x0000,
+ V4_SA1_addi_MASK = 0x1800,
+ V4_SA1_addrx_BITS = 0x1800,
+ V4_SA1_addrx_MASK = 0x1f00,
+ V4_SA1_addsp_BITS = 0x0c00,
+ V4_SA1_addsp_MASK = 0x1c00,
+ V4_SA1_and1_BITS = 0x1200,
+ V4_SA1_and1_MASK = 0x1f00,
+ V4_SA1_clrf_BITS = 0x1a70,
+ V4_SA1_clrf_MASK = 0x1e70,
+ V4_SA1_clrfnew_BITS = 0x1a50,
+ V4_SA1_clrfnew_MASK = 0x1e70,
+ V4_SA1_clrt_BITS = 0x1a60,
+ V4_SA1_clrt_MASK = 0x1e70,
+ V4_SA1_clrtnew_BITS = 0x1a40,
+ V4_SA1_clrtnew_MASK = 0x1e70,
+ V4_SA1_cmpeqi_BITS = 0x1900,
+ V4_SA1_cmpeqi_MASK = 0x1f00,
+ V4_SA1_combine0i_BITS = 0x1c00,
+ V4_SA1_combine0i_MASK = 0x1d18,
+ V4_SA1_combine1i_BITS = 0x1c08,
+ V4_SA1_combine1i_MASK = 0x1d18,
+ V4_SA1_combine2i_BITS = 0x1c10,
+ V4_SA1_combine2i_MASK = 0x1d18,
+ V4_SA1_combine3i_BITS = 0x1c18,
+ V4_SA1_combine3i_MASK = 0x1d18,
+ V4_SA1_combinerz_BITS = 0x1d08,
+ V4_SA1_combinerz_MASK = 0x1d08,
+ V4_SA1_combinezr_BITS = 0x1d00,
+ V4_SA1_combinezr_MASK = 0x1d08,
+ V4_SA1_dec_BITS = 0x1300,
+ V4_SA1_dec_MASK = 0x1f00,
+ V4_SA1_inc_BITS = 0x1100,
+ V4_SA1_inc_MASK = 0x1f00,
+ V4_SA1_seti_BITS = 0x0800,
+ V4_SA1_seti_MASK = 0x1c00,
+ V4_SA1_setin1_BITS = 0x1a00,
+ V4_SA1_setin1_MASK = 0x1e40,
+ V4_SA1_sxtb_BITS = 0x1500,
+ V4_SA1_sxtb_MASK = 0x1f00,
+ V4_SA1_sxth_BITS = 0x1400,
+ V4_SA1_sxth_MASK = 0x1f00,
+ V4_SA1_tfr_BITS = 0x1000,
+ V4_SA1_tfr_MASK = 0x1f00,
+ V4_SA1_zxtb_BITS = 0x1700,
+ V4_SA1_zxtb_MASK = 0x1f00,
+ V4_SA1_zxth_BITS = 0x1600,
+ V4_SA1_zxth_MASK = 0x1f00,
+ V4_SL1_loadri_io_BITS = 0x0000,
+ V4_SL1_loadri_io_MASK = 0x1000,
+ V4_SL1_loadrub_io_BITS = 0x1000,
+ V4_SL1_loadrub_io_MASK = 0x1000,
+ V4_SL2_deallocframe_BITS = 0x1f00,
+ V4_SL2_deallocframe_MASK = 0x1fc0,
+ V4_SL2_jumpr31_BITS = 0x1fc0,
+ V4_SL2_jumpr31_MASK = 0x1fc4,
+ V4_SL2_jumpr31_f_BITS = 0x1fc5,
+ V4_SL2_jumpr31_f_MASK = 0x1fc7,
+ V4_SL2_jumpr31_fnew_BITS = 0x1fc7,
+ V4_SL2_jumpr31_fnew_MASK = 0x1fc7,
+ V4_SL2_jumpr31_t_BITS = 0x1fc4,
+ V4_SL2_jumpr31_t_MASK = 0x1fc7,
+ V4_SL2_jumpr31_tnew_BITS = 0x1fc6,
+ V4_SL2_jumpr31_tnew_MASK = 0x1fc7,
+ V4_SL2_loadrb_io_BITS = 0x1000,
+ V4_SL2_loadrb_io_MASK = 0x1800,
+ V4_SL2_loadrd_sp_BITS = 0x1e00,
+ V4_SL2_loadrd_sp_MASK = 0x1f00,
+ V4_SL2_loadrh_io_BITS = 0x0000,
+ V4_SL2_loadrh_io_MASK = 0x1800,
+ V4_SL2_loadri_sp_BITS = 0x1c00,
+ V4_SL2_loadri_sp_MASK = 0x1e00,
+ V4_SL2_loadruh_io_BITS = 0x0800,
+ V4_SL2_loadruh_io_MASK = 0x1800,
+ V4_SL2_return_BITS = 0x1f40,
+ V4_SL2_return_MASK = 0x1fc4,
+ V4_SL2_return_f_BITS = 0x1f45,
+ V4_SL2_return_f_MASK = 0x1fc7,
+ V4_SL2_return_fnew_BITS = 0x1f47,
+ V4_SL2_return_fnew_MASK = 0x1fc7,
+ V4_SL2_return_t_BITS = 0x1f44,
+ V4_SL2_return_t_MASK = 0x1fc7,
+ V4_SL2_return_tnew_BITS = 0x1f46,
+ V4_SL2_return_tnew_MASK = 0x1fc7,
+ V4_SS1_storeb_io_BITS = 0x1000,
+ V4_SS1_storeb_io_MASK = 0x1000,
+ V4_SS1_storew_io_BITS = 0x0000,
+ V4_SS1_storew_io_MASK = 0x1000,
+ V4_SS2_allocframe_BITS = 0x1c00,
+ V4_SS2_allocframe_MASK = 0x1e00,
+ V4_SS2_storebi0_BITS = 0x1200,
+ V4_SS2_storebi0_MASK = 0x1f00,
+ V4_SS2_storebi1_BITS = 0x1300,
+ V4_SS2_storebi1_MASK = 0x1f00,
+ V4_SS2_stored_sp_BITS = 0x0a00,
+ V4_SS2_stored_sp_MASK = 0x1e00,
+ V4_SS2_storeh_io_BITS = 0x0000,
+ V4_SS2_storeh_io_MASK = 0x1800,
+ V4_SS2_storew_sp_BITS = 0x0800,
+ V4_SS2_storew_sp_MASK = 0x1e00,
+ V4_SS2_storewi0_BITS = 0x1000,
+ V4_SS2_storewi0_MASK = 0x1f00,
+ V4_SS2_storewi1_BITS = 0x1100,
+ V4_SS2_storewi1_MASK = 0x1f00
+};
+
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+ raw_ostream &os) {
+ switch (IClass) {
+ case HexagonII::HSIG_L1:
+ if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS)
+ op = Hexagon::V4_SL1_loadri_io;
+ else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS)
+ op = Hexagon::V4_SL1_loadrub_io;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_L2:
+ if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS)
+ op = Hexagon::V4_SL2_deallocframe;
+ else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS)
+ op = Hexagon::V4_SL2_jumpr31;
+ else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS)
+ op = Hexagon::V4_SL2_jumpr31_f;
+ else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS)
+ op = Hexagon::V4_SL2_jumpr31_fnew;
+ else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS)
+ op = Hexagon::V4_SL2_jumpr31_t;
+ else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS)
+ op = Hexagon::V4_SL2_jumpr31_tnew;
+ else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS)
+ op = Hexagon::V4_SL2_loadrb_io;
+ else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS)
+ op = Hexagon::V4_SL2_loadrd_sp;
+ else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS)
+ op = Hexagon::V4_SL2_loadrh_io;
+ else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS)
+ op = Hexagon::V4_SL2_loadri_sp;
+ else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS)
+ op = Hexagon::V4_SL2_loadruh_io;
+ else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS)
+ op = Hexagon::V4_SL2_return;
+ else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS)
+ op = Hexagon::V4_SL2_return_f;
+ else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS)
+ op = Hexagon::V4_SL2_return_fnew;
+ else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS)
+ op = Hexagon::V4_SL2_return_t;
+ else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS)
+ op = Hexagon::V4_SL2_return_tnew;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_A:
+ if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS)
+ op = Hexagon::V4_SA1_addi;
+ else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS)
+ op = Hexagon::V4_SA1_addrx;
+ else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS)
+ op = Hexagon::V4_SA1_addsp;
+ else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS)
+ op = Hexagon::V4_SA1_and1;
+ else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS)
+ op = Hexagon::V4_SA1_clrf;
+ else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS)
+ op = Hexagon::V4_SA1_clrfnew;
+ else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS)
+ op = Hexagon::V4_SA1_clrt;
+ else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS)
+ op = Hexagon::V4_SA1_clrtnew;
+ else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS)
+ op = Hexagon::V4_SA1_cmpeqi;
+ else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS)
+ op = Hexagon::V4_SA1_combine0i;
+ else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS)
+ op = Hexagon::V4_SA1_combine1i;
+ else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS)
+ op = Hexagon::V4_SA1_combine2i;
+ else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS)
+ op = Hexagon::V4_SA1_combine3i;
+ else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS)
+ op = Hexagon::V4_SA1_combinerz;
+ else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS)
+ op = Hexagon::V4_SA1_combinezr;
+ else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS)
+ op = Hexagon::V4_SA1_dec;
+ else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS)
+ op = Hexagon::V4_SA1_inc;
+ else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS)
+ op = Hexagon::V4_SA1_seti;
+ else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS)
+ op = Hexagon::V4_SA1_setin1;
+ else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS)
+ op = Hexagon::V4_SA1_sxtb;
+ else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS)
+ op = Hexagon::V4_SA1_sxth;
+ else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS)
+ op = Hexagon::V4_SA1_tfr;
+ else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS)
+ op = Hexagon::V4_SA1_zxtb;
+ else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS)
+ op = Hexagon::V4_SA1_zxth;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_S1:
+ if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS)
+ op = Hexagon::V4_SS1_storeb_io;
+ else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS)
+ op = Hexagon::V4_SS1_storew_io;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ case HexagonII::HSIG_S2:
+ if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS)
+ op = Hexagon::V4_SS2_allocframe;
+ else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS)
+ op = Hexagon::V4_SS2_storebi0;
+ else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS)
+ op = Hexagon::V4_SS2_storebi1;
+ else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS)
+ op = Hexagon::V4_SS2_stored_sp;
+ else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS)
+ op = Hexagon::V4_SS2_storeh_io;
+ else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS)
+ op = Hexagon::V4_SS2_storew_sp;
+ else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS)
+ op = Hexagon::V4_SS2_storewi0;
+ else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS)
+ op = Hexagon::V4_SS2_storewi1;
+ else {
+ os << "<unknown subinstruction>";
+ return MCDisassembler::Fail;
+ }
+ break;
+ default:
+ os << "<unknown>";
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
+
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
+ if (encoded_reg < 8)
+ return Hexagon::R0 + encoded_reg;
+ else if (encoded_reg < 16)
+ return Hexagon::R0 + encoded_reg + 8;
+
+ // patently false value
+ return Hexagon::NoRegister;
+}
+
+static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
+ if (encoded_dreg < 4)
+ return Hexagon::D0 + encoded_dreg;
+ else if (encoded_dreg < 8)
+ return Hexagon::D0 + encoded_dreg + 4;
+
+ // patently false value
+ return Hexagon::NoRegister;
+}
+
+void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
+ unsigned inst) const {
+ int64_t operand;
+ MCOperand Op;
+ switch (opcode) {
+ case Hexagon::V4_SL2_deallocframe:
+ case Hexagon::V4_SL2_jumpr31:
+ case Hexagon::V4_SL2_jumpr31_f:
+ case Hexagon::V4_SL2_jumpr31_fnew:
+ case Hexagon::V4_SL2_jumpr31_t:
+ case Hexagon::V4_SL2_jumpr31_tnew:
+ case Hexagon::V4_SL2_return:
+ case Hexagon::V4_SL2_return_f:
+ case Hexagon::V4_SL2_return_fnew:
+ case Hexagon::V4_SL2_return_t:
+ case Hexagon::V4_SL2_return_tnew:
+ // no operands for these instructions
+ break;
+ case Hexagon::V4_SS2_allocframe:
+ // u 8-4{5_3}
+ operand = ((inst & 0x1f0) >> 4) << 3;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL1_loadri_io:
+ // Rd 3-0, Rs 7-4, u 11-8{4_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 6;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL1_loadrub_io:
+ // Rd 3-0, Rs 7-4, u 11-8
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 8;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL2_loadrb_io:
+ // Rd 3-0, Rs 7-4, u 10-8
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x700) >> 8;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL2_loadrh_io:
+ case Hexagon::V4_SL2_loadruh_io:
+ // Rd 3-0, Rs 7-4, u 10-8{3_1}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x700) >> 8) << 1;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL2_loadrd_sp:
+ // Rdd 2-0, u 7-3{5_3}
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x0f8) >> 3) << 3;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SL2_loadri_sp:
+ // Rd 3-0, u 8-4{5_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x1f0) >> 4) << 2;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_addi:
+ // Rx 3-0 (x2), s7 10-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ MI->addOperand(Op);
+ operand = SignExtend64<7>((inst & 0x7f0) >> 4);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_addrx:
+ // Rx 3-0 (x2), Rs 7-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ case Hexagon::V4_SA1_and1:
+ case Hexagon::V4_SA1_dec:
+ case Hexagon::V4_SA1_inc:
+ case Hexagon::V4_SA1_sxtb:
+ case Hexagon::V4_SA1_sxth:
+ case Hexagon::V4_SA1_tfr:
+ case Hexagon::V4_SA1_zxtb:
+ case Hexagon::V4_SA1_zxth:
+ // Rd 3-0, Rs 7-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_addsp:
+ // Rd 3-0, u 9-4{6_2}
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x3f0) >> 4) << 2;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_seti:
+ // Rd 3-0, u 9-4
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x3f0) >> 4;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_clrf:
+ case Hexagon::V4_SA1_clrfnew:
+ case Hexagon::V4_SA1_clrt:
+ case Hexagon::V4_SA1_clrtnew:
+ case Hexagon::V4_SA1_setin1:
+ // Rd 3-0
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SA1_cmpeqi:
+ // Rs 7-4, u 1-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = inst & 0x3;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_combine0i:
+ case Hexagon::V4_SA1_combine1i:
+ case Hexagon::V4_SA1_combine2i:
+ case Hexagon::V4_SA1_combine3i:
+ // Rdd 2-0, u 6-5
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0x060) >> 5;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SA1_combinerz:
+ case Hexagon::V4_SA1_combinezr:
+ // Rdd 2-0, Rs 7-4
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS1_storeb_io:
+ // Rs 7-4, u 11-8, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf00) >> 8;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS1_storew_io:
+ // Rs 7-4, u 11-8{4_2}, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0xf00) >> 8) << 2;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storebi0:
+ case Hexagon::V4_SS2_storebi1:
+ // Rs 7-4, u 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = inst & 0xf;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SS2_storewi0:
+ case Hexagon::V4_SS2_storewi1:
+ // Rs 7-4, u 3-0{4_2}
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = (inst & 0xf) << 2;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ break;
+ case Hexagon::V4_SS2_stored_sp:
+ // s 8-3{6_3}, Rtt 2-0
+ operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ operand = getDRegFromSubinstEncoding(inst & 0x7);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storeh_io:
+ // Rs 7-4, u 10-8{3_1}, Rt 3-0
+ operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ operand = ((inst & 0x700) >> 8) << 1;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ case Hexagon::V4_SS2_storew_sp:
+ // u 8-4{5_2}, Rd 3-0
+ operand = ((inst & 0x1f0) >> 4) << 2;
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+ operand = getRegFromSubinstEncoding(inst & 0xf);
+ Op = MCOperand::createReg(operand);
+ MI->addOperand(Op);
+ break;
+ default:
+ // don't crash with an invalid subinstruction
+ // llvm_unreachable("Invalid subinstruction in duplex instruction");
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 0000000..ed7d957
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,56 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
+// Minimum number of instructions in an end-loop packet.
+#define HEXAGON_PACKET_INNER_SIZE 2
+#define HEXAGON_PACKET_OUTER_SIZE 3
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
+#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
+
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class HexagonTargetMachine;
+
+ /// \brief Creates a Hexagon-specific Target Transformation Info pass.
+ ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 0000000..1189cfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,263 @@
+//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//===----------------------------------------------------------------------===//
+
+// Hexagon Architectures
+def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
+def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
+ "true", "Hexagon HVX instructions">;
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
+ "true", "Hexagon HVX Double instructions">;
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def HasV55T : Predicate<"HST->hasV55TOps()">,
+ AssemblerPredicate<"ArchV55">;
+def HasV60T : Predicate<"HST->hasV60TOps()">,
+ AssemblerPredicate<"ArchV60">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def UseHVXDbl : Predicate<"HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVXDbl">;
+def UseHVXSgl : Predicate<"HST->useHVXSglOps()">;
+
+def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVX">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+
+class ImmRegShl;
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+class IntrinsicsRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["PredSense"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = [""];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-true instructions with their
+// predicate-false forms
+//
+def getFalsePredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
+ let ColFields = ["PredSense"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-false instructions with their
+// predicate-true forms
+//
+def getTruePredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
+ let ColFields = ["PredSense"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+ let FilterClass = "PredNewRel";
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
+ let ColFields = ["PNewValue"];
+ let KeyCol = [""];
+ let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate .new predicated instructions with their old
+// format.
+//
+def getPredOldOpcode : InstrMapping {
+ let FilterClass = "PredNewRel";
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let ColFields = ["PNewValue"];
+ let KeyCol = ["new"];
+ let ValueCols = [[""]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+ let FilterClass = "NewValueRel";
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
+ let ColFields = ["NValueST"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate new-value store instructions with their old
+// format.
+//
+def getNonNVStore : InstrMapping {
+ let FilterClass = "NewValueRel";
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
+ let ColFields = ["NValueST"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+def getBaseWithImmOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+ "isFloat"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["Absolute"];
+ let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+ let FilterClass = "AddrModeRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["addrMode"];
+ let KeyCol = ["BaseImmOffset"];
+ let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+ let FilterClass = "ImmRegRel";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+def getRegShlForm : InstrMapping {
+ let FilterClass = "ImmRegShl";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+def notTakenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+def takenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+def getRealHWInstr : InstrMapping {
+ let FilterClass = "IntrinsicsRel";
+ let RowFields = ["BaseOpcode"];
+ let ColFields = ["InstrType"];
+ let KeyCol = ["Pseudo"];
+ let ValueCols = [["Pseudo"], ["Real"]];
+}
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+def HexagonInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, SchedMachineModel Model,
+ list<SubtargetFeature> Features>
+ : ProcessorModel<Name, Model, Features>;
+
+def : Proc<"hexagonv4", HexagonModelV4,
+ [ArchV4]>;
+def : Proc<"hexagonv5", HexagonModelV4,
+ [ArchV4, ArchV5]>;
+def : Proc<"hexagonv55", HexagonModelV55,
+ [ArchV4, ArchV5, ArchV55]>;
+def : Proc<"hexagonv60", HexagonModelV60,
+ [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def HexagonAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string TokenizingCharacters = "#()=:.<>!+*";
+}
+
+def Hexagon : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = HexagonInstrInfo;
+ let AssemblyParserVariants = [HexagonAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 0000000..e213089
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,598 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+#define DEBUG_TYPE "asm-printer"
+
+static cl::opt<bool> AlignCalls(
+ "hexagon-align-calls", cl::Hidden, cl::init(true),
+ cl::desc("Insert falign after call instruction for Hexagon target"));
+
+// Given a scalar register return its pair.
+inline static unsigned getHexagonRegisterPair(unsigned Reg,
+ const MCRegisterInfo *RI) {
+ assert(Hexagon::IntRegsRegClass.contains(Reg));
+ MCSuperRegIterator SR(Reg, RI, false);
+ unsigned Pair = *SR;
+ assert(Hexagon::DoubleRegsRegClass.contains(Pair));
+ return Pair;
+}
+
+HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
+
+void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ default: llvm_unreachable ("<unknown operand type>");
+ case MachineOperand::MO_Register:
+ O << HexagonInstPrinter::getRegisterName(MO.getReg());
+ return;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ MO.getMBB()->getSymbol()->print(O, MAI);
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ GetCPISymbol(MO.getIndex())->print(O, MAI);
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // Computing the address of a global symbol, not calling it.
+ getSymbol(MO.getGlobal())->print(O, MAI);
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+}
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ if (MBB->hasAddressTaken())
+ return false;
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+ case 'c': // Don't print "$" before a global var name or constant.
+ // Hexagon never has a prefix.
+ printOperand(MI, OpNo, OS);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ OS << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, OS);
+ return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &Base = MI->getOperand(OpNo);
+ const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+ if (Base.isReg())
+ printOperand(MI, OpNo, O);
+ else
+ llvm_unreachable("Unimplemented");
+
+ if (Offset.isImm()) {
+ if (Offset.getImm())
+ O << " + #" << Offset.getImm();
+ }
+ else
+ llvm_unreachable("Unimplemented");
+
+ return false;
+}
+
+MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+ MCStreamer &OutStreamer,
+ const MCOperand &Imm, int AlignSize) {
+ MCSymbol *Sym;
+ int64_t Value;
+ if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
+ StringRef sectionPrefix;
+ std::string ImmString;
+ StringRef Name;
+ if (AlignSize == 8) {
+ Name = ".CONST_0000000000000000";
+ sectionPrefix = ".gnu.linkonce.l8";
+ ImmString = utohexstr(Value);
+ } else {
+ Name = ".CONST_00000000";
+ sectionPrefix = ".gnu.linkonce.l4";
+ ImmString = utohexstr(static_cast<uint32_t>(Value));
+ }
+
+ std::string symbolName = // Yes, leading zeros are kept.
+ Name.drop_back(ImmString.size()).str() + ImmString;
+ std::string sectionName = sectionPrefix.str() + symbolName;
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer.SwitchSection(Section);
+
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ OutStreamer.EmitIntValue(Value, AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ } else {
+ assert(Imm.isExpr() && "Expected expression and found none");
+ const MachineOperand &MO = MI.getOperand(1);
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = AP.getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = AP.GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = AP.GetJTISymbol(MO.getIndex());
+ else
+ llvm_unreachable("Unknown operand type!");
+
+ StringRef SymbolName = MOSymbol->getName();
+ std::string LitaName = ".CONST_" + SymbolName.str();
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+ OutStreamer.SwitchSection(Section);
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
+ OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ }
+ return Sym;
+}
+
+void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MI) {
+ MCInst &MappedInst = static_cast <MCInst &>(Inst);
+ const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+
+ switch (Inst.getOpcode()) {
+ default: return;
+
+ // "$dst = CONST64(#$src1)",
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+ if (!OutStreamer->hasRawTextSupport()) {
+ const MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+
+ }
+ break;
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ if (!OutStreamer->hasRawTextSupport()) {
+ MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+ }
+ break;
+
+ // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use
+ // C2_or during instruction selection itself but it results
+ // into suboptimal code.
+ case Hexagon::C2_pxfer_map: {
+ MCOperand &Ps = Inst.getOperand(1);
+ MappedInst.setOpcode(Hexagon::C2_or);
+ MappedInst.addOperand(Ps);
+ return;
+ }
+
+ // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo
+ // The insn is mapped from the 4 operand to the 3 operand raw form taking
+ // 3 register pairs.
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCOperand &Rt = Inst.getOperand(3);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ assert (Rs.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rs.getReg());
+ if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_hi);
+ else // raw:lo
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_lo);
+ Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
+ return;
+ }
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ int64_t Imm;
+ MCExpr const *Expr = MO.getExpr();
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::S2_vsathub);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::S5_vasrhrnd_goodsyntax:
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &MO2 = MappedInst.getOperand(2);
+ MCExpr const *Expr = MO2.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ MCOperand &MO1 = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+ // Add a new operand for the second register in the pair.
+ TmpInst.addOperand(MCOperand::createReg(High));
+ TmpInst.addOperand(MCOperand::createReg(Low));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax)
+ TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ else
+ TmpInst.setOpcode(Hexagon::S5_vasrhrnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &MO = Inst.getOperand(2);
+ MCExpr const *Expr = MO.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::TFRI_f:
+ MappedInst.setOpcode(Hexagon::A2_tfrsi);
+ return;
+ case Hexagon::TFRI_cPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveit);
+ return;
+ case Hexagon::TFRI_cNotPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveif);
+ return;
+ case Hexagon::MUX_ri_f:
+ MappedInst.setOpcode(Hexagon::C2_muxri);
+ return;
+ case Hexagon::MUX_ir_f:
+ MappedInst.setOpcode(Hexagon::C2_muxir);
+ return;
+
+ // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
+ case Hexagon::A2_tfrpi: {
+ MCInst TmpInst;
+ MCOperand &Rdd = MappedInst.getOperand(0);
+ MCOperand &MO = MappedInst.getOperand(1);
+
+ TmpInst.setOpcode(Hexagon::A2_combineii);
+ TmpInst.addOperand(Rdd);
+ int64_t Imm;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
+ if (Success && Imm < 0) {
+ const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(MOne));
+ } else {
+ const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Zero));
+ }
+ TmpInst.addOperand(MO);
+ MappedInst = TmpInst;
+ return;
+ }
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode(Hexagon::A2_combinew);
+ return;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ return;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ return;
+ }
+
+ case Hexagon::M2_mpysmi: {
+ MCOperand &Imm = MappedInst.getOperand(2);
+ MCExpr const *Expr = Imm.getExpr();
+ int64_t Value;
+ bool Success = Expr->evaluateAsAbsolute(Value);
+ assert(Success);(void)Success;
+ if (Value < 0 && Value > -256) {
+ MappedInst.setOpcode(Hexagon::M2_mpysin);
+ Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext));
+ }
+ else
+ MappedInst.setOpcode(Hexagon::M2_mpysip);
+ return;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rt = Inst.getOperand(1);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::A2_addsph);
+ else
+ MappedInst.setOpcode(Hexagon::A2_addspl);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::HEXAGON_V6_vd0_pseudo:
+ case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+ MCInst TmpInst;
+ assert (Inst.getOperand(0).isReg() &&
+ "Expected register and none was found");
+
+ TmpInst.setOpcode(Hexagon::V6_vxor);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ }
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst MCB = HexagonMCInstrInfo::createBundle();
+ const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock* MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
+ unsigned IgnoreCount = 0;
+
+ for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
+ if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ ++IgnoreCount;
+ else
+ HexagonLowerToMC(MCII, &*MII, MCB, *this);
+ }
+ else
+ HexagonLowerToMC(MCII, MI, MCB, *this);
+
+ bool Ok = HexagonMCInstrInfo::canonicalizePacket(
+ MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
+ assert(Ok);
+ (void)Ok;
+ if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+ return;
+ OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+ RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
new file mode 100644
index 0000000..a78d97e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -0,0 +1,62 @@
+//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hexagon Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class HexagonAsmPrinter : public AsmPrinter {
+ const HexagonSubtarget *Subtarget;
+
+ public:
+ explicit HexagonAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
+
+ bool runOnMachineFunction(MachineFunction &Fn) override {
+ Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
+ return AsmPrinter::runOnMachineFunction(Fn);
+ }
+
+ const char *getPassName() const override {
+ return "Hexagon Assembly Printer";
+ }
+
+ bool isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const override;
+
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ void HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MBB);
+
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+
+ static const char *getRegisterName(unsigned RegNo);
+ };
+
+} // end of llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
new file mode 100644
index 0000000..77907b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -0,0 +1,2778 @@
+//===--- HexagonBitSimplify.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexbit"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
+ FunctionPass *createHexagonBitSimplify();
+}
+
+namespace {
+ // Set of virtual registers, based on BitVector.
+ struct RegisterSet : private BitVector {
+ RegisterSet() : BitVector() {}
+ explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+ using BitVector::clear;
+ using BitVector::count;
+
+ unsigned find_first() const {
+ int First = BitVector::find_first();
+ if (First < 0)
+ return 0;
+ return x2v(First);
+ }
+
+ unsigned find_next(unsigned Prev) const {
+ int Next = BitVector::find_next(v2x(Prev));
+ if (Next < 0)
+ return 0;
+ return x2v(Next);
+ }
+
+ RegisterSet &insert(unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return static_cast<RegisterSet&>(BitVector::set(Idx));
+ }
+ RegisterSet &remove(unsigned R) {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return *this;
+ return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ }
+
+ RegisterSet &insert(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ }
+ RegisterSet &remove(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ }
+
+ reference operator[](unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return BitVector::operator[](Idx);
+ }
+ bool operator[](unsigned R) const {
+ unsigned Idx = v2x(R);
+ assert(Idx < size());
+ return BitVector::operator[](Idx);
+ }
+ bool has(unsigned R) const {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return false;
+ return BitVector::test(Idx);
+ }
+
+ bool empty() const {
+ return !BitVector::any();
+ }
+ bool includes(const RegisterSet &Rs) const {
+ // A.BitVector::test(B) <=> A-B != {}
+ return !Rs.BitVector::test(*this);
+ }
+ bool intersects(const RegisterSet &Rs) const {
+ return BitVector::anyCommon(Rs);
+ }
+
+ private:
+ void ensure(unsigned Idx) {
+ if (size() <= Idx)
+ resize(std::max(Idx+1, 32U));
+ }
+ static inline unsigned v2x(unsigned v) {
+ return TargetRegisterInfo::virtReg2Index(v);
+ }
+ static inline unsigned x2v(unsigned x) {
+ return TargetRegisterInfo::index2VirtReg(x);
+ }
+ };
+
+
+ struct PrintRegSet {
+ PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+ : RS(S), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS,
+ const PrintRegSet &P);
+ private:
+ const RegisterSet &RS;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+ OS << '{';
+ for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+ OS << ' ' << PrintReg(R, P.TRI);
+ OS << " }";
+ return OS;
+ }
+}
+
+
+namespace {
+ class Transformation;
+
+ class HexagonBitSimplify : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+ initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon bit simplification";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
+ static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
+ static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
+ const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
+ static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W, uint64_t &U);
+ static bool replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI);
+ static bool getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
+ static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+
+ static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin);
+ static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
+ uint16_t Begin, const HexagonInstrInfo &HII);
+
+ static const TargetRegisterClass *getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
+ static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
+
+ private:
+ MachineDominatorTree *MDT;
+
+ bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+ };
+
+ char HexagonBitSimplify::ID = 0;
+ typedef HexagonBitSimplify HBS;
+
+
+ // The purpose of this class is to provide a common facility to traverse
+ // the function top-down or bottom-up via the dominator tree, and keep
+ // track of the available registers.
+ class Transformation {
+ public:
+ bool TopDown;
+ Transformation(bool TD) : TopDown(TD) {}
+ virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
+ virtual ~Transformation() {}
+ };
+}
+
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+
+
+bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
+ RegisterSet &AVs) {
+ MachineDomTreeNode *N = MDT->getNode(&B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ bool Changed = false;
+
+ if (T.TopDown)
+ Changed = T.processBlock(B, AVs);
+
+ RegisterSet Defs;
+ for (auto &I : B)
+ getInstrDefs(I, Defs);
+ RegisterSet NewAVs = AVs;
+ NewAVs.insert(Defs);
+
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ Changed |= visitBlock(*SB, T, NewAVs);
+ }
+ if (!T.TopDown)
+ Changed |= T.processBlock(B, AVs);
+
+ return Changed;
+}
+
+//
+// Utility functions:
+//
+void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
+ RegisterSet &Defs) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Defs.insert(R);
+ }
+}
+
+void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
+ RegisterSet &Uses) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.insert(R);
+ }
+}
+
+// Check if all the bits in range [B, E) in both cells are equal.
+bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
+ uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
+ uint16_t W) {
+ for (uint16_t i = 0; i < W; ++i) {
+ // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
+ if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
+ return false;
+ // Same for RC2[i].
+ if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
+ return false;
+ if (RC1[B1+i] != RC2[B2+i])
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].num())
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].is(0))
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W, uint64_t &U) {
+ assert(B < RC.width() && B+W <= RC.width());
+ int64_t T = 0;
+ for (uint16_t i = B+W; i > B; --i) {
+ const BitTracker::BitValue &BV = RC[i-1];
+ T <<= 1;
+ if (BV.is(1))
+ T |= 1;
+ else if (!BV.is(0))
+ return false;
+ }
+ U = T;
+ return true;
+}
+
+
+bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (I->getSubReg() != OldSR)
+ continue;
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
+// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
+// if this succeeded, otherwise return false.
+bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
+ const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ assert(RR.Sub == 0);
+ Begin = 0;
+ Width = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (RR.Sub == 0) {
+ Begin = 0;
+ Width = 64;
+ return true;
+ }
+ assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
+ Width = 32;
+ Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
+ return true;
+ }
+ return false;
+}
+
+
+// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
+// subregister.
+bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+ assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
+ unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
+ assert(Sub1 != Sub2);
+ if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+ SL = I.getOperand(1);
+ SH = I.getOperand(3);
+ return true;
+ }
+ if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+ SH = I.getOperand(1);
+ SL = I.getOperand(3);
+ return true;
+ }
+ return false;
+}
+
+
+// All stores (except 64-bit stores) take a 32-bit register as the source
+// of the value to be stored. If the instruction stores into a location
+// that is shorter than 32 bits, some bits of the source register are not
+// used. For each store instruction, calculate the set of used bits in
+// the source register, and set appropriate bits in Bits. Return true if
+// the bits are calculated, false otherwise.
+bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin) {
+ using namespace Hexagon;
+
+ switch (Opc) {
+ // Store byte
+ case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
+ case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
+ case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
+ case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S4_storerb_ap: // memb(Re32=#U6)=Rt32
+ case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
+ case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
+ case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
+ case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
+ case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
+ case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
+ case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
+ case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerbgp: // memb(gp+#u16:0)=Rt32
+ case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
+ case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
+ case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
+ case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
+ case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
+ Bits.set(Begin, Begin+8);
+ return true;
+
+ // Store low half
+ case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
+ case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
+ case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
+ case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S4_storerh_ap: // memh(Re32=#U6)=Rt32
+ case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
+ case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
+ case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
+ case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
+ case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
+ case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
+ case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
+ case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerhgp: // memh(gp+#u16:1)=Rt32
+ case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
+ case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
+ case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
+ case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
+ case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
+ Bits.set(Begin, Begin+16);
+ return true;
+
+ // Store high half
+ case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
+ case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
+ case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
+ case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
+ case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
+ case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
+ case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
+ case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
+ case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
+ case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+
+ return false;
+}
+
+
+// For an instruction with opcode Opc, calculate the set of bits that it
+// uses in a register in operand OpN. This only calculates the set of used
+// bits for cases where it does not depend on any operands (as is the case
+// in shifts, for example). For concrete instructions from a program, the
+// operand may be a subregister of a larger register, while Bits would
+// correspond to the larger register in its entirety. Because of that,
+// the parameter Begin can be used to indicate which bit of Bits should be
+// considered the LSB of of the operand.
+bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
+ BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
+ using namespace Hexagon;
+
+ const MCInstrDesc &D = HII.get(Opc);
+ if (D.mayStore()) {
+ if (OpN == D.getNumOperands()-1)
+ return getUsedBitsInStore(Opc, Bits, Begin);
+ return false;
+ }
+
+ switch (Opc) {
+ // One register source. Used bits: R1[0-7].
+ case A2_sxtb:
+ case A2_zxtb:
+ case A4_cmpbeqi:
+ case A4_cmpbgti:
+ case A4_cmpbgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[0-15].
+ case A2_aslh:
+ case A2_sxth:
+ case A2_zxth:
+ case A4_cmpheqi:
+ case A4_cmphgti:
+ case A4_cmphgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[16-31].
+ case A2_asrh:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-7], R2[0-7].
+ case A4_cmpbeq:
+ case A4_cmpbgt:
+ case A4_cmpbgtu:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[0-15].
+ case A4_cmpheq:
+ case A4_cmphgt:
+ case A4_cmphgtu:
+ case A2_addh_h16_ll:
+ case A2_addh_h16_sat_ll:
+ case A2_addh_l16_ll:
+ case A2_addh_l16_sat_ll:
+ case A2_combine_ll:
+ case A2_subh_h16_ll:
+ case A2_subh_h16_sat_ll:
+ case A2_subh_l16_ll:
+ case A2_subh_l16_sat_ll:
+ case M2_mpy_acc_ll_s0:
+ case M2_mpy_acc_ll_s1:
+ case M2_mpy_acc_sat_ll_s0:
+ case M2_mpy_acc_sat_ll_s1:
+ case M2_mpy_ll_s0:
+ case M2_mpy_ll_s1:
+ case M2_mpy_nac_ll_s0:
+ case M2_mpy_nac_ll_s1:
+ case M2_mpy_nac_sat_ll_s0:
+ case M2_mpy_nac_sat_ll_s1:
+ case M2_mpy_rnd_ll_s0:
+ case M2_mpy_rnd_ll_s1:
+ case M2_mpy_sat_ll_s0:
+ case M2_mpy_sat_ll_s1:
+ case M2_mpy_sat_rnd_ll_s0:
+ case M2_mpy_sat_rnd_ll_s1:
+ case M2_mpyd_acc_ll_s0:
+ case M2_mpyd_acc_ll_s1:
+ case M2_mpyd_ll_s0:
+ case M2_mpyd_ll_s1:
+ case M2_mpyd_nac_ll_s0:
+ case M2_mpyd_nac_ll_s1:
+ case M2_mpyd_rnd_ll_s0:
+ case M2_mpyd_rnd_ll_s1:
+ case M2_mpyu_acc_ll_s0:
+ case M2_mpyu_acc_ll_s1:
+ case M2_mpyu_ll_s0:
+ case M2_mpyu_ll_s1:
+ case M2_mpyu_nac_ll_s0:
+ case M2_mpyu_nac_ll_s1:
+ case M2_mpyud_acc_ll_s0:
+ case M2_mpyud_acc_ll_s1:
+ case M2_mpyud_ll_s0:
+ case M2_mpyud_ll_s1:
+ case M2_mpyud_nac_ll_s0:
+ case M2_mpyud_nac_ll_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[16-31].
+ case A2_addh_h16_lh:
+ case A2_addh_h16_sat_lh:
+ case A2_combine_lh:
+ case A2_subh_h16_lh:
+ case A2_subh_h16_sat_lh:
+ case M2_mpy_acc_lh_s0:
+ case M2_mpy_acc_lh_s1:
+ case M2_mpy_acc_sat_lh_s0:
+ case M2_mpy_acc_sat_lh_s1:
+ case M2_mpy_lh_s0:
+ case M2_mpy_lh_s1:
+ case M2_mpy_nac_lh_s0:
+ case M2_mpy_nac_lh_s1:
+ case M2_mpy_nac_sat_lh_s0:
+ case M2_mpy_nac_sat_lh_s1:
+ case M2_mpy_rnd_lh_s0:
+ case M2_mpy_rnd_lh_s1:
+ case M2_mpy_sat_lh_s0:
+ case M2_mpy_sat_lh_s1:
+ case M2_mpy_sat_rnd_lh_s0:
+ case M2_mpy_sat_rnd_lh_s1:
+ case M2_mpyd_acc_lh_s0:
+ case M2_mpyd_acc_lh_s1:
+ case M2_mpyd_lh_s0:
+ case M2_mpyd_lh_s1:
+ case M2_mpyd_nac_lh_s0:
+ case M2_mpyd_nac_lh_s1:
+ case M2_mpyd_rnd_lh_s0:
+ case M2_mpyd_rnd_lh_s1:
+ case M2_mpyu_acc_lh_s0:
+ case M2_mpyu_acc_lh_s1:
+ case M2_mpyu_lh_s0:
+ case M2_mpyu_lh_s1:
+ case M2_mpyu_nac_lh_s0:
+ case M2_mpyu_nac_lh_s1:
+ case M2_mpyud_acc_lh_s0:
+ case M2_mpyud_acc_lh_s1:
+ case M2_mpyud_lh_s0:
+ case M2_mpyud_lh_s1:
+ case M2_mpyud_nac_lh_s0:
+ case M2_mpyud_nac_lh_s1:
+ // These four are actually LH.
+ case A2_addh_l16_hl:
+ case A2_addh_l16_sat_hl:
+ case A2_subh_l16_hl:
+ case A2_subh_l16_sat_hl:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[0-15].
+ case A2_addh_h16_hl:
+ case A2_addh_h16_sat_hl:
+ case A2_combine_hl:
+ case A2_subh_h16_hl:
+ case A2_subh_h16_sat_hl:
+ case M2_mpy_acc_hl_s0:
+ case M2_mpy_acc_hl_s1:
+ case M2_mpy_acc_sat_hl_s0:
+ case M2_mpy_acc_sat_hl_s1:
+ case M2_mpy_hl_s0:
+ case M2_mpy_hl_s1:
+ case M2_mpy_nac_hl_s0:
+ case M2_mpy_nac_hl_s1:
+ case M2_mpy_nac_sat_hl_s0:
+ case M2_mpy_nac_sat_hl_s1:
+ case M2_mpy_rnd_hl_s0:
+ case M2_mpy_rnd_hl_s1:
+ case M2_mpy_sat_hl_s0:
+ case M2_mpy_sat_hl_s1:
+ case M2_mpy_sat_rnd_hl_s0:
+ case M2_mpy_sat_rnd_hl_s1:
+ case M2_mpyd_acc_hl_s0:
+ case M2_mpyd_acc_hl_s1:
+ case M2_mpyd_hl_s0:
+ case M2_mpyd_hl_s1:
+ case M2_mpyd_nac_hl_s0:
+ case M2_mpyd_nac_hl_s1:
+ case M2_mpyd_rnd_hl_s0:
+ case M2_mpyd_rnd_hl_s1:
+ case M2_mpyu_acc_hl_s0:
+ case M2_mpyu_acc_hl_s1:
+ case M2_mpyu_hl_s0:
+ case M2_mpyu_hl_s1:
+ case M2_mpyu_nac_hl_s0:
+ case M2_mpyu_nac_hl_s1:
+ case M2_mpyud_acc_hl_s0:
+ case M2_mpyud_acc_hl_s1:
+ case M2_mpyud_hl_s0:
+ case M2_mpyud_hl_s1:
+ case M2_mpyud_nac_hl_s0:
+ case M2_mpyud_nac_hl_s1:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[16-31].
+ case A2_addh_h16_hh:
+ case A2_addh_h16_sat_hh:
+ case A2_combine_hh:
+ case A2_subh_h16_hh:
+ case A2_subh_h16_sat_hh:
+ case M2_mpy_acc_hh_s0:
+ case M2_mpy_acc_hh_s1:
+ case M2_mpy_acc_sat_hh_s0:
+ case M2_mpy_acc_sat_hh_s1:
+ case M2_mpy_hh_s0:
+ case M2_mpy_hh_s1:
+ case M2_mpy_nac_hh_s0:
+ case M2_mpy_nac_hh_s1:
+ case M2_mpy_nac_sat_hh_s0:
+ case M2_mpy_nac_sat_hh_s1:
+ case M2_mpy_rnd_hh_s0:
+ case M2_mpy_rnd_hh_s1:
+ case M2_mpy_sat_hh_s0:
+ case M2_mpy_sat_hh_s1:
+ case M2_mpy_sat_rnd_hh_s0:
+ case M2_mpy_sat_rnd_hh_s1:
+ case M2_mpyd_acc_hh_s0:
+ case M2_mpyd_acc_hh_s1:
+ case M2_mpyd_hh_s0:
+ case M2_mpyd_hh_s1:
+ case M2_mpyd_nac_hh_s0:
+ case M2_mpyd_nac_hh_s1:
+ case M2_mpyd_rnd_hh_s0:
+ case M2_mpyd_rnd_hh_s1:
+ case M2_mpyu_acc_hh_s0:
+ case M2_mpyu_acc_hh_s1:
+ case M2_mpyu_hh_s0:
+ case M2_mpyu_hh_s1:
+ case M2_mpyu_nac_hh_s0:
+ case M2_mpyu_nac_hh_s1:
+ case M2_mpyud_acc_hh_s0:
+ case M2_mpyud_acc_hh_s1:
+ case M2_mpyud_hh_s0:
+ case M2_mpyud_hh_s1:
+ case M2_mpyud_nac_hh_s0:
+ case M2_mpyud_nac_hh_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+
+// Calculate the register class that matches Reg:Sub. For example, if
+// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// register class.
+const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return nullptr;
+ auto *RC = MRI.getRegClass(RR.Reg);
+ if (RR.Sub == 0)
+ return RC;
+
+ auto VerifySR = [] (unsigned Sub) -> void {
+ assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+ };
+
+ switch (RC->getID()) {
+ case Hexagon::DoubleRegsRegClassID:
+ VerifySR(RR.Sub);
+ return &Hexagon::IntRegsRegClass;
+ }
+ return nullptr;
+}
+
+
+// Check if RD could be replaced with RS at any possible use of RD.
+// For example a predicate register cannot be replaced with a integer
+// register, but a 64-bit register with a subregister can be replaced
+// with a 32-bit register.
+bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
+ !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+ return false;
+ // Return false if one (or both) classes are nullptr.
+ auto *DRC = getFinalVRegClass(RD, MRI);
+ if (!DRC)
+ return false;
+
+ return DRC == getFinalVRegClass(RS, MRI);
+}
+
+
+//
+// Dead code elimination
+//
+namespace {
+ class DeadCodeElimination {
+ public:
+ DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
+ : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+ MDT(mdt), MRI(mf.getRegInfo()) {}
+
+ bool run() {
+ return runOnNode(MDT.getRootNode());
+ }
+
+ private:
+ bool isDead(unsigned R) const;
+ bool runOnNode(MachineDomTreeNode *N);
+
+ MachineFunction &MF;
+ const HexagonInstrInfo &HII;
+ MachineDominatorTree &MDT;
+ MachineRegisterInfo &MRI;
+ };
+}
+
+
+bool DeadCodeElimination::isDead(unsigned R) const {
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (UseI->isDebugValue())
+ continue;
+ if (UseI->isPHI()) {
+ assert(!UseI->getOperand(0).getSubReg());
+ unsigned DR = UseI->getOperand(0).getReg();
+ if (DR == R)
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+
+bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
+ bool Changed = false;
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ Changed |= runOnNode(*I);
+
+ MachineBasicBlock *B = N->getBlock();
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ for (auto MI : Instrs) {
+ unsigned Opc = MI->getOpcode();
+ // Do not touch lifetime markers. This is why the target-independent DCE
+ // cannot be used.
+ if (Opc == TargetOpcode::LIFETIME_START ||
+ Opc == TargetOpcode::LIFETIME_END)
+ continue;
+ bool Store = false;
+ if (MI->isInlineAsm())
+ continue;
+ // Delete PHIs if possible.
+ if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
+ continue;
+
+ bool AllDead = true;
+ SmallVector<unsigned,2> Regs;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+ AllDead = false;
+ break;
+ }
+ Regs.push_back(R);
+ }
+ if (!AllDead)
+ continue;
+
+ B->erase(MI);
+ for (unsigned i = 0, n = Regs.size(); i != n; ++i)
+ MRI.markUsesInDebugValueAsUndef(Regs[i]);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+//
+// Eliminate redundant instructions
+//
+// This transformation will identify instructions where the output register
+// is the same as one of its input registers. This only works on instructions
+// that define a single register (unlike post-increment loads, for example).
+// The equality check is actually more detailed: the code calculates which
+// bits of the output are used, and only compares these bits with the input
+// registers.
+// If the output matches an input, the instruction is replaced with COPY.
+// The copies will be removed by another transformation.
+namespace {
+ class RedundantInstrElimination : public Transformation {
+ public:
+ RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool computeUsedBits(unsigned Reg, BitVector &Bits);
+ bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
+ uint16_t Begin);
+ bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the instruction is a lossy shift left, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN, Width;
+ switch (Opc) {
+ case S2_asl_i_p:
+ ImN = 2;
+ RegN = 1;
+ Width = 64;
+ break;
+ case S2_asl_i_p_acc:
+ case S2_asl_i_p_and:
+ case S2_asl_i_p_nac:
+ case S2_asl_i_p_or:
+ case S2_asl_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 64;
+ break;
+ case S2_asl_i_r:
+ ImN = 2;
+ RegN = 1;
+ Width = 32;
+ break;
+ case S2_addasl_rrri:
+ case S4_andi_asl_ri:
+ case S4_ori_asl_ri:
+ case S4_addi_asl_ri:
+ case S4_subi_asl_ri:
+ case S2_asl_i_r_acc:
+ case S2_asl_i_r_and:
+ case S2_asl_i_r_nac:
+ case S2_asl_i_r_or:
+ case S2_asl_i_r_sat:
+ case S2_asl_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 32;
+ break;
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ if (S == 0)
+ return false;
+ LostB = Width-S;
+ LostE = Width;
+ return true;
+}
+
+
+// Check if the instruction is a lossy shift right, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN;
+ switch (Opc) {
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S2_asr_i_p_acc:
+ case S2_asr_i_p_and:
+ case S2_asr_i_p_nac:
+ case S2_asr_i_p_or:
+ case S2_lsr_i_p_acc:
+ case S2_lsr_i_p_and:
+ case S2_lsr_i_p_nac:
+ case S2_lsr_i_p_or:
+ case S2_lsr_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+ case S2_asr_i_r:
+ case S2_lsr_i_r:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S4_andi_lsr_ri:
+ case S4_ori_lsr_ri:
+ case S4_addi_lsr_ri:
+ case S4_subi_lsr_ri:
+ case S2_asr_i_r_acc:
+ case S2_asr_i_r_and:
+ case S2_asr_i_r_nac:
+ case S2_asr_i_r_or:
+ case S2_lsr_i_r_acc:
+ case S2_lsr_i_r_and:
+ case S2_lsr_i_r_nac:
+ case S2_lsr_i_r_or:
+ case S2_lsr_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ LostB = 0;
+ LostE = S;
+ return true;
+}
+
+
+// Calculate the bit vector that corresponds to the used bits of register Reg.
+// The vector Bits has the same size, as the size of Reg in bits. If the cal-
+// culation fails (i.e. the used bits are unknown), it returns false. Other-
+// wise, it returns true and sets the corresponding bits in Bits.
+bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
+ BitVector Used(Bits.size());
+ RegisterSet Visited;
+ std::vector<unsigned> Pending;
+ Pending.push_back(Reg);
+
+ for (unsigned i = 0; i < Pending.size(); ++i) {
+ unsigned R = Pending[i];
+ if (Visited.has(R))
+ continue;
+ Visited.insert(R);
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ BitTracker::RegisterRef UR = *I;
+ unsigned B, W;
+ if (!HBS::getSubregMask(UR, B, W, MRI))
+ return false;
+ MachineInstr &UseI = *I->getParent();
+ if (UseI.isPHI() || UseI.isCopy()) {
+ unsigned DefR = UseI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ return false;
+ Pending.push_back(DefR);
+ } else {
+ if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
+ return false;
+ }
+ }
+ }
+ Bits |= Used;
+ return true;
+}
+
+
+// Calculate the bits used by instruction MI in a register in operand OpN.
+// Return true/false if the calculation succeeds/fails. If is succeeds, set
+// used bits in Bits. This function does not reset any bits in Bits, so
+// subsequent calls over different instructions will result in the union
+// of the used bits in all these instructions.
+// The register in question may be used with a sub-register, whereas Bits
+// holds the bits for the entire register. To keep track of that, the
+// argument Begin indicates where in Bits is the lowest-significant bit
+// of the register used in operand OpN. For example, in instruction:
+// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+// the operand 1 is a 32-bit register, which happens to be a subregister
+// of the 64-bit register vreg2, and that subregister starts at position 32.
+// In this case Begin=32, since Bits[32] would be the lowest-significant bit
+// of vreg2:subreg_hireg.
+bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
+ unsigned OpN, BitVector &Bits, uint16_t Begin) {
+ unsigned Opc = MI.getOpcode();
+ BitVector T(Bits.size());
+ bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
+ // Even if we don't have bits yet, we could still provide some information
+ // if the instruction is a lossy shift: the lost bits will be marked as
+ // not used.
+ unsigned LB, LE;
+ if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
+ assert(MI.getOperand(OpN).isReg());
+ BitTracker::RegisterRef RR = MI.getOperand(OpN);
+ const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
+ uint16_t Width = RC->getSize()*8;
+
+ if (!GotBits)
+ T.set(Begin, Begin+Width);
+ assert(LB <= LE && LB < Width && LE <= Width);
+ T.reset(Begin+LB, Begin+LE);
+ GotBits = true;
+ }
+ if (GotBits)
+ Bits |= T;
+ return GotBits;
+}
+
+
+// Calculates the used bits in RD ("defined register"), and checks if these
+// bits in RS ("used register") and RD are identical.
+bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
+ BitTracker::RegisterRef RS) {
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+
+ unsigned DB, DW;
+ if (!HBS::getSubregMask(RD, DB, DW, MRI))
+ return false;
+ unsigned SB, SW;
+ if (!HBS::getSubregMask(RS, SB, SW, MRI))
+ return false;
+ if (SW != DW)
+ return false;
+
+ BitVector Used(DC.width());
+ if (!computeUsedBits(RD.Reg, Used))
+ return false;
+
+ for (unsigned i = 0; i != DW; ++i)
+ if (Used[i+DB] && DC[DB+i] != SC[SB+i])
+ return false;
+ return true;
+}
+
+
+bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
+ const RegisterSet&) {
+ bool Changed = false;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+
+ if (MI->getOpcode() == TargetOpcode::COPY)
+ continue;
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ continue;
+ unsigned NumD = MI->getDesc().getNumDefs();
+ if (NumD != 1)
+ continue;
+
+ BitTracker::RegisterRef RD = MI->getOperand(0);
+ if (!BT.has(RD.Reg))
+ continue;
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+
+ // Find a source operand that is equal to the result.
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ continue;
+
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
+ continue;
+
+ // If found, replace the instruction with a COPY.
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), SC);
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+
+//
+// Const generation
+//
+// Recognize instructions that produce constant values known at compile-time.
+// Replace them with register definitions that load these constants directly.
+namespace {
+ class ConstGeneration : public Transformation {
+ public:
+ ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isTfrConst(const MachineInstr *MI) const;
+ bool isConst(unsigned R, int64_t &V) const;
+ unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
+ if (!BT.has(R))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ int64_t T = 0;
+ for (unsigned i = RC.width(); i > 0; --i) {
+ const BitTracker::BitValue &V = RC[i-1];
+ T <<= 1;
+ if (V.is(1))
+ T |= 1;
+ else if (!V.is(0))
+ return false;
+ }
+ C = T;
+ return true;
+}
+
+
+bool ConstGeneration::isTfrConst(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_tfrsi:
+ case Hexagon::A2_tfrpi:
+ case Hexagon::TFR_PdTrue:
+ case Hexagon::TFR_PdFalse:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return true;
+ }
+ return false;
+}
+
+
+// Generate a transfer-immediate instruction that is appropriate for the
+// register class and the actual value being transferred.
+unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+ unsigned Reg = MRI.createVirtualRegister(RC);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
+ .addImm(int32_t(C));
+ return Reg;
+ }
+
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (isInt<8>(C)) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ unsigned Lo = Lo_32(C), Hi = Hi_32(C);
+ if (isInt<8>(Lo) || isInt<8>(Hi)) {
+ unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
+ : Hexagon::A4_combineii;
+ BuildMI(B, At, DL, HII.get(Opc), Reg)
+ .addImm(int32_t(Hi))
+ .addImm(int32_t(Lo));
+ return Reg;
+ }
+
+ BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ if (RC == &Hexagon::PredRegsRegClass) {
+ unsigned Opc;
+ if (C == 0)
+ Opc = Hexagon::TFR_PdFalse;
+ else if ((C & 0xFF) == 0xFF)
+ Opc = Hexagon::TFR_PdTrue;
+ else
+ return 0;
+ BuildMI(B, At, DL, HII.get(Opc), Reg);
+ return Reg;
+ }
+
+ return 0;
+}
+
+
+bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I) {
+ if (isTfrConst(I))
+ continue;
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DR))
+ continue;
+ int64_t C;
+ if (isConst(DR, C)) {
+ DebugLoc DL = I->getDebugLoc();
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+ if (ImmReg) {
+ HBS::replaceReg(DR, ImmReg, MRI);
+ BT.put(ImmReg, BT.lookup(DR));
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+
+//
+// Copy generation
+//
+// Identify pairs of available registers which hold identical values.
+// In such cases, only one of them needs to be calculated, the other one
+// will be defined as a copy of the first.
+//
+// Copy propagation
+//
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
+namespace {
+ class CopyGeneration : public Transformation {
+ public:
+ CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+
+ class CopyPropagation : public Transformation {
+ public:
+ CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+ : Transformation(false), MRI(mri) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ static bool isCopyReg(unsigned Opc);
+ private:
+ bool propagateRegCopy(MachineInstr &MI);
+
+ MachineRegisterInfo &MRI;
+ };
+
+}
+
+
+/// Check if there is a register in AVs that is identical to Inp. If so,
+/// set Out to the found register. The output may be a pair Reg:Sub.
+bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
+ if (!BT.has(Inp.Reg))
+ return false;
+ const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+ unsigned B, W;
+ if (!HBS::getSubregMask(Inp, B, W, MRI))
+ return false;
+
+ for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+ if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+ continue;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ unsigned RW = RC.width();
+ if (W == RW) {
+ if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+ continue;
+ if (!HBS::isEqual(InpRC, B, RC, 0, W))
+ continue;
+ Out.Reg = R;
+ Out.Sub = 0;
+ return true;
+ }
+ // Check if there is a super-register, whose part (with a subregister)
+ // is equal to the input.
+ // Only do double registers for now.
+ if (W*2 != RW)
+ continue;
+ if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
+ continue;
+
+ if (HBS::isEqual(InpRC, B, RC, 0, W))
+ Out.Sub = Hexagon::subreg_loreg;
+ else if (HBS::isEqual(InpRC, B, RC, W, W))
+ Out.Sub = Hexagon::subreg_hireg;
+ else
+ continue;
+ Out.Reg = R;
+ return true;
+ }
+ return false;
+}
+
+
+bool CopyGeneration::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ RegisterSet AVB(AVs);
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
+ ++I, AVB.insert(Defs)) {
+ NextI = std::next(I);
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+
+ unsigned Opc = I->getOpcode();
+ if (CopyPropagation::isCopyReg(Opc))
+ continue;
+
+ for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+ BitTracker::RegisterRef MR;
+ if (!findMatch(R, MR, AVB))
+ continue;
+ DebugLoc DL = I->getDebugLoc();
+ auto *FRC = HBS::getFinalVRegClass(MR, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(MR.Reg, 0, MR.Sub);
+ BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+ }
+ }
+
+ return Changed;
+}
+
+
+bool CopyPropagation::isCopyReg(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::REG_SEQUENCE:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
+ bool Changed = false;
+ unsigned Opc = MI.getOpcode();
+ BitTracker::RegisterRef RD = MI.getOperand(0);
+ assert(MI.getOperand(0).getSubReg() == 0);
+
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp: {
+ BitTracker::RegisterRef RS = MI.getOperand(1);
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ break;
+ if (RS.Sub != 0)
+ Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
+ else
+ Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
+ break;
+ }
+ case TargetOpcode::REG_SEQUENCE: {
+ BitTracker::RegisterRef SL, SH;
+ if (HBS::parseRegSequence(MI, SL, SH)) {
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ SL.Reg, SL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ SH.Reg, SH.Sub, MRI);
+ }
+ break;
+ }
+ case Hexagon::A2_combinew: {
+ BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ RL.Reg, RL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ RH.Reg, RH.Sub, MRI);
+ break;
+ }
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri: {
+ unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
+ unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
+ : Hexagon::subreg_hireg;
+ BitTracker::RegisterRef RS = MI.getOperand(SrcX);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
+ break;
+ }
+ }
+ return Changed;
+}
+
+
+bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ bool Changed = false;
+ for (auto I : Instrs) {
+ unsigned Opc = I->getOpcode();
+ if (!CopyPropagation::isCopyReg(Opc))
+ continue;
+ Changed |= propagateRegCopy(*I);
+ }
+
+ return Changed;
+}
+
+
+//
+// Bit simplification
+//
+// Recognize patterns that can be simplified and replace them with the
+// simpler forms.
+// This is by no means complete
+namespace {
+ class BitSimplification : public Transformation {
+ public:
+ BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ struct RegHalf : public BitTracker::RegisterRef {
+ bool Low; // Low/High halfword.
+ };
+
+ bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ unsigned B, RegHalf &RH);
+
+ bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
+ unsigned getCombineOpcode(bool HLow, bool LLow);
+
+ bool genStoreUpperHalf(MachineInstr *MI);
+ bool genStoreImmediate(MachineInstr *MI);
+ bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the bits [B..B+16) in register cell RC form a valid halfword,
+// i.e. [0..16), [16..32), etc. of some register. If so, return true and
+// set the information about the found register in RH.
+bool BitSimplification::matchHalf(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
+ // XXX This could be searching in the set of available registers, in case
+ // the match is not exact.
+
+ // Match 16-bit chunks, where the RC[B..B+15] references exactly one
+ // register and all the bits B..B+15 match between RC and the register.
+ // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
+ // and RC = { [0]:0 [1-15]:v1[1-15]... }.
+ bool Low = false;
+ unsigned I = B;
+ while (I < B+16 && RC[I].num())
+ I++;
+ if (I == B+16)
+ return false;
+
+ unsigned Reg = RC[I].RefI.Reg;
+ unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
+ if (P < I-B)
+ return false;
+ unsigned Pos = P - (I-B);
+
+ if (Reg == 0 || Reg == SelfR) // Don't match "self".
+ return false;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!BT.has(Reg))
+ return false;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(Reg);
+ if (Pos+16 > SC.width())
+ return false;
+
+ for (unsigned i = 0; i < 16; ++i) {
+ const BitTracker::BitValue &RV = RC[i+B];
+ if (RV.Type == BitTracker::BitValue::Ref) {
+ if (RV.RefI.Reg != Reg)
+ return false;
+ if (RV.RefI.Pos != i+Pos)
+ return false;
+ continue;
+ }
+ if (RC[i+B] != SC[i+Pos])
+ return false;
+ }
+
+ unsigned Sub = 0;
+ switch (Pos) {
+ case 0:
+ Sub = Hexagon::subreg_loreg;
+ Low = true;
+ break;
+ case 16:
+ Sub = Hexagon::subreg_loreg;
+ Low = false;
+ break;
+ case 32:
+ Sub = Hexagon::subreg_hireg;
+ Low = true;
+ break;
+ case 48:
+ Sub = Hexagon::subreg_hireg;
+ Low = false;
+ break;
+ default:
+ return false;
+ }
+
+ RH.Reg = Reg;
+ RH.Sub = Sub;
+ RH.Low = Low;
+ // If the subregister is not valid with the register, set it to 0.
+ if (!HBS::getFinalVRegClass(RH, MRI))
+ RH.Sub = 0;
+
+ return true;
+}
+
+
+// Check if RC matches the pattern of a S2_packhl. If so, return true and
+// set the inputs Rs and Rt.
+bool BitSimplification::matchPackhl(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
+ BitTracker::RegisterRef &Rt) {
+ RegHalf L1, H1, L2, H2;
+
+ if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
+ return false;
+ if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
+ return false;
+
+ // Rs = H1.L1, Rt = H2.L2
+ if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
+ return false;
+ if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
+ return false;
+
+ Rs = H1;
+ Rt = H2;
+ return true;
+}
+
+
+unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
+ return HLow ? LLow ? Hexagon::A2_combine_ll
+ : Hexagon::A2_combine_lh
+ : LLow ? Hexagon::A2_combine_hl
+ : Hexagon::A2_combine_hh;
+}
+
+
+// If MI stores the upper halfword of a register (potentially obtained via
+// shifts or extracts), replace it with a storerf instruction. This could
+// cause the "extraction" code to become dead.
+bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_storerh_io)
+ return false;
+
+ MachineOperand &ValOp = MI->getOperand(2);
+ BitTracker::RegisterRef RS = ValOp;
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ RegHalf H;
+ if (!matchHalf(0, RC, 0, H))
+ return false;
+ if (H.Low)
+ return false;
+ MI->setDesc(HII.get(Hexagon::S2_storerf_io));
+ ValOp.setReg(H.Reg);
+ ValOp.setSubReg(H.Sub);
+ return true;
+}
+
+
+// If MI stores a value known at compile-time, and the value is within a range
+// that avoids using constant-extenders, replace it with a store-immediate.
+bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ unsigned Align = 0;
+ switch (Opc) {
+ case Hexagon::S2_storeri_io:
+ Align++;
+ case Hexagon::S2_storerh_io:
+ Align++;
+ case Hexagon::S2_storerb_io:
+ break;
+ default:
+ return false;
+ }
+
+ // Avoid stores to frame-indices (due to an unknown offset).
+ if (!MI->getOperand(0).isReg())
+ return false;
+ MachineOperand &OffOp = MI->getOperand(1);
+ if (!OffOp.isImm())
+ return false;
+
+ int64_t Off = OffOp.getImm();
+ // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
+ if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
+ return false;
+ // Source register:
+ BitTracker::RegisterRef RS = MI->getOperand(2);
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ uint64_t U;
+ if (!HBS::getConst(RC, 0, RC.width(), U))
+ return false;
+
+ // Only consider 8-bit values to avoid constant-extenders.
+ int V;
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ V = int8_t(U);
+ break;
+ case Hexagon::S2_storerh_io:
+ V = int16_t(U);
+ break;
+ case Hexagon::S2_storeri_io:
+ V = int32_t(U);
+ break;
+ }
+ if (!isInt<8>(V))
+ return false;
+
+ MI->RemoveOperand(2);
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
+ break;
+ case Hexagon::S2_storerh_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
+ break;
+ case Hexagon::S2_storeri_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
+ break;
+ }
+ MI->addOperand(MachineOperand::CreateImm(V));
+ return true;
+}
+
+
+// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
+// last instruction in a sequence that results in something equivalent to
+// the pack-halfwords. The intent is to cause the entire sequence to become
+// dead.
+bool BitSimplification::genPackhl(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == Hexagon::S2_packhl)
+ return false;
+ BitTracker::RegisterRef Rs, Rt;
+ if (!matchPackhl(RD.Reg, RC, Rs, Rt))
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+ .addReg(Rs.Reg, 0, Rs.Sub)
+ .addReg(Rt.Reg, 0, Rt.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI produces halfword of the input in the low half of the output,
+// replace it with zero-extend or extractu.
+bool BitSimplification::genExtractHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L;
+ // Check for halfword in low 16 bits, zeros elsewhere.
+ if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Prefer zxth, since zxth can go in any slot, while extractu only in
+ // slots 2 and 3.
+ unsigned NewR = 0;
+ if (L.Low && Opc != Hexagon::A2_zxth) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+ .addReg(L.Reg, 0, L.Sub);
+ } else if (!L.Low && Opc != Hexagon::S2_extractu) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR)
+ .addReg(L.Reg, 0, L.Sub)
+ .addImm(16)
+ .addImm(16);
+ }
+ if (NewR == 0)
+ return false;
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
+// combine.
+bool BitSimplification::genCombineHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L, H;
+ // Check for combine h/l
+ if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
+ return false;
+ // Do nothing if this is just a reg copy.
+ if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ unsigned COpc = getCombineOpcode(H.Low, L.Low);
+ if (COpc == Opc)
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(COpc), NewR)
+ .addReg(H.Reg, 0, H.Sub)
+ .addReg(L.Reg, 0, L.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI resets high bits of a register and keeps the lower ones, replace it
+// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
+bool BitSimplification::genExtractLow(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ case Hexagon::S2_extractu:
+ return false;
+ }
+ if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
+ int32_t Imm = MI->getOperand(2).getImm();
+ if (isInt<10>(Imm))
+ return false;
+ }
+
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ return false;
+ unsigned W = RC.width();
+ while (W > 0 && RC[W-1].is(0))
+ W--;
+ if (W == 0 || W == RC.width())
+ return false;
+ unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
+ : (W == 16) ? Hexagon::A2_zxth
+ : (W < 10) ? Hexagon::A2_andir
+ : Hexagon::S2_extractu;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+ if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
+ continue;
+
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ if (NewOpc == Hexagon::A2_andir)
+ MIB.addImm((1 << W) - 1);
+ else if (NewOpc == Hexagon::S2_extractu)
+ MIB.addImm(W).addImm(0);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+ }
+ return false;
+}
+
+
+// Check for tstbit simplification opportunity, where the bit being checked
+// can be tracked back to another register. For example:
+// vreg2 = S2_lsr_i_r vreg1, 5
+// vreg3 = S2_tstbit_i vreg2, 0
+// =>
+// vreg3 = S2_tstbit_i vreg1, 5
+bool BitSimplification::simplifyTstbit(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_tstbit_i)
+ return false;
+
+ unsigned BN = MI->getOperand(2).getImm();
+ BitTracker::RegisterRef RS = MI->getOperand(1);
+ unsigned F, W;
+ DebugLoc DL = MI->getDebugLoc();
+ if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
+ return false;
+ MachineBasicBlock &B = *MI->getParent();
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ const BitTracker::BitValue &V = SC[F+BN];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
+ const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
+ // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
+ // a double register, need to use a subregister and adjust bit
+ // number.
+ unsigned P = UINT_MAX;
+ BitTracker::RegisterRef RR(V.RefI.Reg, 0);
+ if (TC == &Hexagon::DoubleRegsRegClass) {
+ P = V.RefI.Pos;
+ RR.Sub = Hexagon::subreg_loreg;
+ if (P >= 32) {
+ P -= 32;
+ RR.Sub = Hexagon::subreg_hireg;
+ }
+ } else if (TC == &Hexagon::IntRegsRegClass) {
+ P = V.RefI.Pos;
+ }
+ if (P != UINT_MAX) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+ .addReg(RR.Reg, 0, RR.Sub)
+ .addImm(P);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BT.put(NewR, RC);
+ return true;
+ }
+ } else if (V.is(0) || V.is(1)) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+ BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool BitSimplification::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ bool Changed = false;
+ RegisterSet AVB = AVs;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
+ MachineInstr *MI = &*I;
+ Defs.clear();
+ HBS::getInstrDefs(*MI, Defs);
+
+ unsigned Opc = MI->getOpcode();
+ if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+ continue;
+
+ if (MI->mayStore()) {
+ bool T = genStoreUpperHalf(MI);
+ T = T || genStoreImmediate(MI);
+ Changed |= T;
+ continue;
+ }
+
+ if (Defs.count() != 1)
+ continue;
+ const MachineOperand &Op0 = MI->getOperand(0);
+ if (!Op0.isReg() || !Op0.isDef())
+ continue;
+ BitTracker::RegisterRef RD = Op0;
+ if (!BT.has(RD.Reg))
+ continue;
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
+
+ if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
+ bool T = genPackhl(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::IntRegsRegClassID) {
+ bool T = genExtractHalf(MI, RD, RC);
+ T = T || genCombineHalf(MI, RD, RC);
+ T = T || genExtractLow(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::PredRegsRegClassID) {
+ bool T = simplifyTstbit(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+ }
+ return Changed;
+}
+
+
+bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HRI = *HST.getRegisterInfo();
+ auto &HII = *HST.getInstrInfo();
+
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Changed;
+
+ Changed = DeadCodeElimination(MF, *MDT).run();
+
+ const HexagonEvaluator HE(HRI, MRI, HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+
+ MachineBasicBlock &Entry = MF.front();
+
+ RegisterSet AIG; // Available registers for IG.
+ ConstGeneration ImmG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, ImmG, AIG);
+
+ RegisterSet ARE; // Available registers for RIE.
+ RedundantInstrElimination RIE(BT, HII, MRI);
+ Changed |= visitBlock(Entry, RIE, ARE);
+
+ RegisterSet ACG; // Available registers for CG.
+ CopyGeneration CopyG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, CopyG, ACG);
+
+ RegisterSet ACP; // Available registers for CP.
+ CopyPropagation CopyP(HRI, MRI);
+ Changed |= visitBlock(Entry, CopyP, ACP);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ BT.run();
+ RegisterSet ABS; // Available registers for BS.
+ BitSimplification BitS(BT, HII, MRI);
+ Changed |= visitBlock(Entry, BitS, ABS);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ if (Changed) {
+ for (auto &B : MF)
+ for (auto &I : B)
+ I.clearKillInfo();
+ DeadCodeElimination(MF, *MDT).run();
+ }
+ return Changed;
+}
+
+
+// Recognize loops where the code at the end of the loop matches the code
+// before the entry of the loop, and the matching code is such that is can
+// be simplified. This pass relies on the bit simplification above and only
+// prepares code in a way that can be handled by the bit simplifcation.
+//
+// This is the motivating testcase (and explanation):
+//
+// {
+// loop0(.LBB0_2, r1) // %for.body.preheader
+// r5:4 = memd(r0++#8)
+// }
+// {
+// r3 = lsr(r4, #16)
+// r7:6 = combine(r5, r5)
+// }
+// {
+// r3 = insert(r5, #16, #16)
+// r7:6 = vlsrw(r7:6, #16)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r6 # R6 is really R5.H
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r3 # R3 is really R4.H
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }
+// { # "Shuffling" code that sets up R3 and R6
+// r3 = lsr(r4, #16) # so that their halves can be stored in the
+// r7:6 = combine(r5, r5) # next iteration. This could be folded into
+// } # the stores if the code was at the beginning
+// { # of the loop iteration. Since the same code
+// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
+// r7:6 = vlsrw(r7:6, #16) # there.
+// }:endloop0
+//
+//
+// The outcome:
+//
+// {
+// loop0(.LBB0_2, r1)
+// r5:4 = memd(r0++#8)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r5.h
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r4.h
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }:endloop0
+
+namespace llvm {
+ FunctionPass *createHexagonLoopRescheduling();
+ void initializeHexagonLoopReschedulingPass(PassRegistry&);
+}
+
+namespace {
+ class HexagonLoopRescheduling : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonLoopRescheduling() : MachineFunctionPass(ID),
+ HII(0), HRI(0), MRI(0), BTP(0) {
+ initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+ MachineRegisterInfo *MRI;
+ BitTracker *BTP;
+
+ struct LoopCand {
+ LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
+ MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
+ MachineBasicBlock *LB, *PB, *EB;
+ };
+ typedef std::vector<MachineInstr*> InstrList;
+ struct InstrGroup {
+ BitTracker::RegisterRef Inp, Out;
+ InstrList Ins;
+ };
+ struct PhiInfo {
+ PhiInfo(MachineInstr &P, MachineBasicBlock &B);
+ unsigned DefR;
+ BitTracker::RegisterRef LR, PR;
+ MachineBasicBlock *LB, *PB;
+ };
+
+ static unsigned getDefReg(const MachineInstr *MI);
+ bool isConst(unsigned Reg) const;
+ bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
+ bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
+ bool isShuffleOf(unsigned OutR, unsigned InpR) const;
+ bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
+ unsigned &InpR2) const;
+ void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
+ MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
+ bool processLoop(LoopCand &C);
+ };
+}
+
+char HexagonLoopRescheduling::ID = 0;
+
+INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
+ "Hexagon Loop Rescheduling", false, false)
+
+
+HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
+ MachineBasicBlock &B) {
+ DefR = HexagonLoopRescheduling::getDefReg(&P);
+ LB = &B;
+ PB = nullptr;
+ for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
+ const MachineOperand &OpB = P.getOperand(i+1);
+ if (OpB.getMBB() == &B) {
+ LR = P.getOperand(i);
+ continue;
+ }
+ PB = OpB.getMBB();
+ PR = P.getOperand(i);
+ }
+}
+
+
+unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ if (Defs.count() != 1)
+ return 0;
+ return Defs.find_first();
+}
+
+
+bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
+ if (!BTP->has(Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
+ for (unsigned i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (!V.is(0) && !V.is(1))
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
+ unsigned DefR) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::S2_lsr_i_r:
+ case Hexagon::S2_asr_i_r:
+ case Hexagon::S2_asl_i_r:
+ case Hexagon::S2_lsr_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_insert:
+ case Hexagon::A2_or:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_and:
+ case Hexagon::A2_andp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineri:
+ case Hexagon::A4_combineir:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_combine_ll:
+ case Hexagon::A2_combine_lh:
+ case Hexagon::A2_combine_hl:
+ case Hexagon::A2_combine_hh:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
+ unsigned InpR) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == InpR)
+ return i == n-1;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
+ if (!BTP->has(OutR) || !BTP->has(InpR))
+ return false;
+ const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
+ for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = OutC[i];
+ if (V.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V.RefI.Reg != InpR)
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
+ unsigned OutR2, unsigned &InpR2) const {
+ if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
+ return false;
+ const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
+ const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
+ unsigned W = OutC1.width();
+ unsigned MatchR = 0;
+ if (W != OutC2.width())
+ return false;
+ for (unsigned i = 0; i < W; ++i) {
+ const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
+ if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
+ return false;
+ if (V1.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V1.RefI.Pos != V2.RefI.Pos)
+ return false;
+ if (V1.RefI.Reg != InpR1)
+ return false;
+ if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
+ return false;
+ if (!MatchR)
+ MatchR = V2.RefI.Reg;
+ else if (V2.RefI.Reg != MatchR)
+ return false;
+ }
+ InpR2 = MatchR;
+ return true;
+}
+
+
+void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
+ MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
+ unsigned NewPredR) {
+ DenseMap<unsigned,unsigned> RegMap;
+
+ const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
+ unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+ BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
+ .addReg(NewPredR)
+ .addMBB(&PB)
+ .addReg(G.Inp.Reg)
+ .addMBB(&LB);
+ RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
+
+ for (unsigned i = G.Ins.size(); i > 0; --i) {
+ const MachineInstr *SI = G.Ins[i-1];
+ unsigned DR = getDefReg(SI);
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ unsigned NewDR = MRI->createVirtualRegister(RC);
+ DebugLoc DL = SI->getDebugLoc();
+
+ auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
+ for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
+ const MachineOperand &Op = SI->getOperand(j);
+ if (!Op.isReg()) {
+ MIB.addOperand(Op);
+ continue;
+ }
+ if (!Op.isUse())
+ continue;
+ unsigned UseR = RegMap[Op.getReg()];
+ MIB.addReg(UseR, 0, Op.getSubReg());
+ }
+ RegMap.insert(std::make_pair(DR, NewDR));
+ }
+
+ HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
+}
+
+
+bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
+ DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
+ std::vector<PhiInfo> Phis;
+ for (auto &I : *C.LB) {
+ if (!I.isPHI())
+ break;
+ unsigned PR = getDefReg(&I);
+ if (isConst(PR))
+ continue;
+ bool BadUse = false, GoodUse = false;
+ for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() != C.LB) {
+ BadUse = true;
+ break;
+ }
+ if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
+ GoodUse = true;
+ }
+ if (BadUse || !GoodUse)
+ continue;
+
+ Phis.push_back(PhiInfo(I, *C.LB));
+ }
+
+ DEBUG({
+ dbgs() << "Phis: {";
+ for (auto &I : Phis) {
+ dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
+ << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
+ << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
+ << I.LB->getNumber() << ')';
+ }
+ dbgs() << " }\n";
+ });
+
+ if (Phis.empty())
+ return false;
+
+ bool Changed = false;
+ InstrList ShufIns;
+
+ // Go backwards in the block: for each bit shuffling instruction, check
+ // if that instruction could potentially be moved to the front of the loop:
+ // the output of the loop cannot be used in a non-shuffling instruction
+ // in this loop.
+ for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
+ if (I->isTerminator())
+ continue;
+ if (I->isPHI())
+ break;
+
+ RegisterSet Defs;
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DefR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ continue;
+ if (!isBitShuffle(&*I, DefR))
+ continue;
+
+ bool BadUse = false;
+ for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() == C.LB) {
+ if (UseI->isPHI()) {
+ // If the use is in a phi node in this loop, then it should be
+ // the value corresponding to the back edge.
+ unsigned Idx = UI.getOperandNo();
+ if (UseI->getOperand(Idx+1).getMBB() != C.LB)
+ BadUse = true;
+ } else {
+ auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+ if (F == ShufIns.end())
+ BadUse = true;
+ }
+ } else {
+ // There is a use outside of the loop, but there is no epilog block
+ // suitable for a copy-out.
+ if (C.EB == nullptr)
+ BadUse = true;
+ }
+ if (BadUse)
+ break;
+ }
+
+ if (BadUse)
+ continue;
+ ShufIns.push_back(&*I);
+ }
+
+ // Partition the list of shuffling instructions into instruction groups,
+ // where each group has to be moved as a whole (i.e. a group is a chain of
+ // dependent instructions). A group produces a single live output register,
+ // which is meant to be the input of the loop phi node (although this is
+ // not checked here yet). It also uses a single register as its input,
+ // which is some value produced in the loop body. After moving the group
+ // to the beginning of the loop, that input register would need to be
+ // the loop-carried register (through a phi node) instead of the (currently
+ // loop-carried) output register.
+ typedef std::vector<InstrGroup> InstrGroupList;
+ InstrGroupList Groups;
+
+ for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
+ MachineInstr *SI = ShufIns[i];
+ if (SI == nullptr)
+ continue;
+
+ InstrGroup G;
+ G.Ins.push_back(SI);
+ G.Out.Reg = getDefReg(SI);
+ RegisterSet Inputs;
+ HBS::getInstrUses(*SI, Inputs);
+
+ for (unsigned j = i+1; j < n; ++j) {
+ MachineInstr *MI = ShufIns[j];
+ if (MI == nullptr)
+ continue;
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ // If this instruction does not define any pending inputs, skip it.
+ if (!Defs.intersects(Inputs))
+ continue;
+ // Otherwise, add it to the current group and remove the inputs that
+ // are defined by MI.
+ G.Ins.push_back(MI);
+ Inputs.remove(Defs);
+ // Then add all registers used by MI.
+ HBS::getInstrUses(*MI, Inputs);
+ ShufIns[j] = nullptr;
+ }
+
+ // Only add a group if it requires at most one register.
+ if (Inputs.count() > 1)
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+ continue;
+
+ G.Inp.Reg = Inputs.find_first();
+ Groups.push_back(G);
+ }
+
+ DEBUG({
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ dbgs() << "Group[" << i << "] inp: "
+ << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
+ << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
+ for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
+ dbgs() << " " << *G.Ins[j];
+ }
+ });
+
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+ if (F == Phis.end())
+ continue;
+ unsigned PredR = 0;
+ if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
+ const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
+ unsigned Opc = DefPredR->getOpcode();
+ if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
+ continue;
+ if (!DefPredR->getOperand(1).isImm())
+ continue;
+ if (DefPredR->getOperand(1).getImm() != 0)
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
+ if (RC != MRI->getRegClass(F->PR.Reg)) {
+ PredR = MRI->createVirtualRegister(RC);
+ unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
+ : Hexagon::A2_tfrpi;
+ auto T = C.PB->getFirstTerminator();
+ DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
+ BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+ .addImm(0);
+ } else {
+ PredR = F->PR.Reg;
+ }
+ }
+ assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
+ moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ HII = HST.getInstrInfo();
+ HRI = HST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+ BTP = &BT;
+
+ std::vector<LoopCand> Cand;
+
+ for (auto &B : MF) {
+ if (B.pred_size() != 2 || B.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PB = nullptr;
+ bool IsLoop = false;
+ for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
+ if (*PI != &B)
+ PB = *PI;
+ else
+ IsLoop = true;
+ }
+ if (!IsLoop)
+ continue;
+
+ MachineBasicBlock *EB = nullptr;
+ for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
+ if (*SI == &B)
+ continue;
+ // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
+ // edge from B to EP is non-critical.
+ if ((*SI)->pred_size() == 1)
+ EB = *SI;
+ break;
+ }
+
+ Cand.push_back(LoopCand(&B, PB, EB));
+ }
+
+ bool Changed = false;
+ for (auto &C : Cand)
+ Changed |= processLoop(C);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopRescheduling() {
+ return new HexagonLoopRescheduling();
+}
+
+FunctionPass *llvm::createHexagonBitSimplify() {
+ return new HexagonBitSimplify();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
new file mode 100644
index 0000000..d5848dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -0,0 +1,1175 @@
+//===--- HexagonBitTracker.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
+ MachineRegisterInfo &mri,
+ const HexagonInstrInfo &tii,
+ MachineFunction &mf)
+ : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) {
+ // Populate the VRX map (VR to extension-type).
+ // Go over all the formal parameters of the function. If a given parameter
+ // P is sign- or zero-extended, locate the virtual register holding that
+ // parameter and create an entry in the VRX map indicating the type of ex-
+ // tension (and the source type).
+ // This is a bit complicated to do accurately, since the memory layout in-
+ // formation is necessary to precisely determine whether an aggregate para-
+ // meter will be passed in a register or in memory. What is given in MRI
+ // is the association between the physical register that is live-in (i.e.
+ // holds an argument), and the virtual register that this value will be
+ // copied into. This, by itself, is not sufficient to map back the virtual
+ // register to a formal parameter from Function (since consecutive live-ins
+ // from MRI may not correspond to consecutive formal parameters from Func-
+ // tion). To avoid the complications with in-memory arguments, only consi-
+ // der the initial sequence of formal parameters that are known to be
+ // passed via registers.
+ unsigned AttrIdx = 0;
+ unsigned InVirtReg, InPhysReg = 0;
+ const Function &F = *MF.getFunction();
+ typedef Function::const_arg_iterator arg_iterator;
+ for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ AttrIdx++;
+ const Argument &Arg = *I;
+ Type *ATy = Arg.getType();
+ unsigned Width = 0;
+ if (ATy->isIntegerTy())
+ Width = ATy->getIntegerBitWidth();
+ else if (ATy->isPointerTy())
+ Width = 32;
+ // If pointer size is not set through target data, it will default to
+ // Module::AnyPointerSize.
+ if (Width == 0 || Width > 64)
+ break;
+ InPhysReg = getNextPhysReg(InPhysReg, Width);
+ if (!InPhysReg)
+ break;
+ InVirtReg = getVirtRegFor(InPhysReg);
+ if (!InVirtReg)
+ continue;
+ AttributeSet Attrs = F.getAttributes();
+ if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
+ VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
+ else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
+ VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width)));
+ }
+}
+
+
+BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+ if (Sub == 0)
+ return MachineEvaluator::mask(Reg, 0);
+ using namespace Hexagon;
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ID = RC->getID();
+ uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
+ switch (ID) {
+ case DoubleRegsRegClassID:
+ case VecDblRegsRegClassID:
+ case VecDblRegs128BRegClassID:
+ return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
+ : BT::BitMask(RW, 2*RW-1);
+ default:
+ break;
+ }
+#ifndef NDEBUG
+ dbgs() << PrintReg(Reg, &TRI, Sub) << '\n';
+#endif
+ llvm_unreachable("Unexpected register/subregister");
+}
+
+namespace {
+class RegisterRefs {
+ std::vector<BT::RegisterRef> Vector;
+
+public:
+ RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) {
+ for (unsigned i = 0, n = Vector.size(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg())
+ Vector[i] = BT::RegisterRef(MO);
+ // For indices that don't correspond to registers, the entry will
+ // remain constructed via the default constructor.
+ }
+ }
+
+ size_t size() const { return Vector.size(); }
+ const BT::RegisterRef &operator[](unsigned n) const {
+ // The main purpose of this operator is to assert with bad argument.
+ assert(n < Vector.size());
+ return Vector[n];
+ }
+};
+}
+
+bool HexagonEvaluator::evaluate(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ unsigned NumDefs = 0;
+
+ // Sanity verification: there should not be any defs with subregisters.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ NumDefs++;
+ assert(MO.getSubReg() == 0);
+ }
+
+ if (NumDefs == 0)
+ return false;
+
+ if (MI->mayLoad())
+ return evaluateLoad(MI, Inputs, Outputs);
+
+ // Check COPY instructions that copy formal parameters into virtual
+ // registers. Such parameters can be sign- or zero-extended at the
+ // call site, and we should take advantage of this knowledge. The MRI
+ // keeps a list of pairs of live-in physical and virtual registers,
+ // which provides information about which virtual registers will hold
+ // the argument values. The function will still contain instructions
+ // defining those virtual registers, and in practice those are COPY
+ // instructions from a physical to a virtual register. In such cases,
+ // applying the argument extension to the virtual register can be seen
+ // as simply mirroring the extension that had already been applied to
+ // the physical register at the call site. If the defining instruction
+ // was not a COPY, it would not be clear how to mirror that extension
+ // on the callee's side. For that reason, only check COPY instructions
+ // for potential extensions.
+ if (MI->isCopy()) {
+ if (evaluateFormalCopy(MI, Inputs, Outputs))
+ return true;
+ }
+
+ // Beyond this point, if any operand is a global, skip that instruction.
+ // The reason is that certain instructions that can take an immediate
+ // operand can also have a global symbol in that operand. To avoid
+ // checking what kind of operand a given instruction has individually
+ // for each instruction, do it here. Global symbols as operands gene-
+ // rally do not provide any useful information.
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() ||
+ MO.isCPI())
+ return false;
+ }
+
+ RegisterRefs Reg(MI);
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+ #define op(i) MI->getOperand(i)
+ #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs))
+ #define im(i) MI->getOperand(i).getImm()
+
+ // If the instruction has no register operands, skip it.
+ if (Reg.size() == 0)
+ return false;
+
+ // Record result for register in operand 0.
+ auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs)
+ -> bool {
+ putCell(Reg[0], Val, Outputs);
+ return true;
+ };
+ // Get the cell corresponding to the N-th operand.
+ auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W)
+ -> BT::RegisterCell {
+ const MachineOperand &Op = MI->getOperand(N);
+ if (Op.isImm())
+ return eIMM(Op.getImm(), W);
+ if (!Op.isReg())
+ return RegisterCell::self(0, W);
+ assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch");
+ return rc(N);
+ };
+ // Extract RW low bits of the cell.
+ auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW)
+ -> BT::RegisterCell {
+ assert(RW <= RC.width());
+ return eXTR(RC, 0, RW);
+ };
+ // Extract RW high bits of the cell.
+ auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW)
+ -> BT::RegisterCell {
+ uint16_t W = RC.width();
+ assert(RW <= W);
+ return eXTR(RC, W-RW, W);
+ };
+ // Extract N-th halfword (counting from the least significant position).
+ auto half = [this] (const BT::RegisterCell &RC, unsigned N)
+ -> BT::RegisterCell {
+ assert(N*16+16 <= RC.width());
+ return eXTR(RC, N*16, N*16+16);
+ };
+ // Shuffle bits (pick even/odd from cells and merge into result).
+ auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt,
+ uint16_t BW, bool Odd) -> BT::RegisterCell {
+ uint16_t I = Odd, Ws = Rs.width();
+ assert(Ws == Rt.width());
+ RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW));
+ I += 2;
+ while (I*BW < Ws) {
+ RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW));
+ I += 2;
+ }
+ return RC;
+ };
+
+ // The bitwidth of the 0th operand. In most (if not all) of the
+ // instructions below, the 0th operand is the defined register.
+ // Pre-compute the bitwidth here, because it is needed in many cases
+ // cases below.
+ uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+
+ switch (Opc) {
+ // Transfer immediate:
+
+ case A2_tfrsi:
+ case A2_tfrpi:
+ case CONST32:
+ case CONST32_Float_Real:
+ case CONST32_Int_Real:
+ case CONST64_Float_Real:
+ case CONST64_Int_Real:
+ return rr0(eIMM(im(1), W0), Outputs);
+ case TFR_PdFalse:
+ return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs);
+ case TFR_PdTrue:
+ return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs);
+ case TFR_FI: {
+ int FI = op(1).getIndex();
+ int Off = op(2).getImm();
+ unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
+ unsigned L = Log2_32(A);
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+ RC.fill(0, L, BT::BitValue::Zero);
+ return rr0(RC, Outputs);
+ }
+
+ // Transfer register:
+
+ case A2_tfr:
+ case A2_tfrp:
+ case C2_pxfer_map:
+ return rr0(rc(1), Outputs);
+ case C2_tfrpr: {
+ uint16_t RW = W0;
+ uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ assert(PW <= RW);
+ RegisterCell PC = eXTR(rc(1), 0, PW);
+ RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1));
+ RC.fill(PW, RW, BT::BitValue::Zero);
+ return rr0(RC, Outputs);
+ }
+ case C2_tfrrp: {
+ RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+ W0 = 8; // XXX Pred size
+ return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+ }
+
+ // Arithmetic:
+
+ case A2_abs:
+ case A2_absp:
+ // TODO
+ break;
+
+ case A2_addsp: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ assert(W0 == 64 && W1 == 32);
+ RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1));
+ RegisterCell RC = eADD(eSXT(CW, W1), rc(2));
+ return rr0(RC, Outputs);
+ }
+ case A2_add:
+ case A2_addp:
+ return rr0(eADD(rc(1), rc(2)), Outputs);
+ case A2_addi:
+ return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs);
+ case S4_addi_asl_ri: {
+ RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_addi_lsr_ri: {
+ RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_addaddi: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addi: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyrr_addi: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addr_u2: {
+ RegisterCell M = eMLS(eIMM(im(2), W0), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyri_addr: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M4_mpyrr_addr: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case S4_subaddi: {
+ RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_accii: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M2_acci: {
+ RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_subacc: {
+ RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ case S2_addasl_rrri: {
+ RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case C4_addipc: {
+ RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0);
+ RPC.fill(0, 2, BT::BitValue::Zero);
+ return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs);
+ }
+ case A2_sub:
+ case A2_subp:
+ return rr0(eSUB(rc(1), rc(2)), Outputs);
+ case A2_subri:
+ return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs);
+ case S4_subi_asl_ri: {
+ RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_subi_lsr_ri: {
+ RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M2_naccii: {
+ RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M2_nacci: {
+ RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3)));
+ return rr0(RC, Outputs);
+ }
+ // 32-bit negation is done by "Rd = A2_subri 0, Rs"
+ case A2_negp:
+ return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs);
+
+ case M2_mpy_up: {
+ RegisterCell M = eMLS(rc(1), rc(2));
+ return rr0(hi(M, W0), Outputs);
+ }
+ case M2_dpmpyss_s0:
+ return rr0(eMLS(rc(1), rc(2)), Outputs);
+ case M2_dpmpyss_acc_s0:
+ return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs);
+ case M2_dpmpyss_nac_s0:
+ return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs);
+ case M2_mpyi: {
+ RegisterCell M = eMLS(rc(1), rc(2));
+ return rr0(lo(M, W0), Outputs);
+ }
+ case M2_macsip: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_macsin: {
+ RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+ RegisterCell RC = eSUB(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_maci: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eADD(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
+ case M2_mpysmi: {
+ RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpysin: {
+ RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpysip: {
+ RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+ return rr0(lo(M, 32), Outputs);
+ }
+ case M2_mpyu_up: {
+ RegisterCell M = eMLU(rc(1), rc(2));
+ return rr0(hi(M, W0), Outputs);
+ }
+ case M2_dpmpyuu_s0:
+ return rr0(eMLU(rc(1), rc(2)), Outputs);
+ case M2_dpmpyuu_acc_s0:
+ return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs);
+ case M2_dpmpyuu_nac_s0:
+ return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs);
+ //case M2_mpysu_up:
+
+ // Logical/bitwise:
+
+ case A2_andir:
+ return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs);
+ case A2_and:
+ case A2_andp:
+ return rr0(eAND(rc(1), rc(2)), Outputs);
+ case A4_andn:
+ case A4_andnp:
+ return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+ case S4_andi_asl_ri: {
+ RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_andi_lsr_ri: {
+ RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M4_and_and:
+ return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_and_andn:
+ return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case M4_and_or:
+ return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_and_xor:
+ return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_orir:
+ return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs);
+ case A2_or:
+ case A2_orp:
+ return rr0(eORL(rc(1), rc(2)), Outputs);
+ case A4_orn:
+ case A4_ornp:
+ return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+ case S4_ori_asl_ri: {
+ RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case S4_ori_lsr_ri: {
+ RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+ return rr0(RC, Outputs);
+ }
+ case M4_or_and:
+ return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_or_andn:
+ return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case S4_or_andi:
+ case S4_or_andix: {
+ RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case S4_or_ori: {
+ RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0)));
+ return rr0(RC, Outputs);
+ }
+ case M4_or_or:
+ return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_or_xor:
+ return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_xor:
+ case A2_xorp:
+ return rr0(eXOR(rc(1), rc(2)), Outputs);
+ case M4_xor_and:
+ return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case M4_xor_andn:
+ return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case M4_xor_or:
+ return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case M4_xor_xacc:
+ return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs);
+ case A2_not:
+ case A2_notp:
+ return rr0(eNOT(rc(1)), Outputs);
+
+ case S2_asl_i_r:
+ case S2_asl_i_p:
+ return rr0(eASL(rc(1), im(2)), Outputs);
+ case A2_aslh:
+ return rr0(eASL(rc(1), 16), Outputs);
+ case S2_asl_i_r_acc:
+ case S2_asl_i_p_acc:
+ return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_nac:
+ case S2_asl_i_p_nac:
+ return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_and:
+ case S2_asl_i_p_and:
+ return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_or:
+ case S2_asl_i_p_or:
+ return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_r_xacc:
+ case S2_asl_i_p_xacc:
+ return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs);
+ case S2_asl_i_vh:
+ case S2_asl_i_vw:
+ // TODO
+ break;
+
+ case S2_asr_i_r:
+ case S2_asr_i_p:
+ return rr0(eASR(rc(1), im(2)), Outputs);
+ case A2_asrh:
+ return rr0(eASR(rc(1), 16), Outputs);
+ case S2_asr_i_r_acc:
+ case S2_asr_i_p_acc:
+ return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_nac:
+ case S2_asr_i_p_nac:
+ return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_and:
+ case S2_asr_i_p_and:
+ return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_or:
+ case S2_asr_i_p_or:
+ return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs);
+ case S2_asr_i_r_rnd: {
+ // The input is first sign-extended to 64 bits, then the output
+ // is truncated back to 32 bits.
+ assert(W0 == 32);
+ RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+ RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1);
+ return rr0(eXTR(RC, 0, W0), Outputs);
+ }
+ case S2_asr_i_r_rnd_goodsyntax: {
+ int64_t S = im(2);
+ if (S == 0)
+ return rr0(rc(1), Outputs);
+ // Result: S2_asr_i_r_rnd Rs, u5-1
+ RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+ RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1);
+ return rr0(eXTR(RC, 0, W0), Outputs);
+ }
+ case S2_asr_r_vh:
+ case S2_asr_i_vw:
+ case S2_asr_i_svw_trun:
+ // TODO
+ break;
+
+ case S2_lsr_i_r:
+ case S2_lsr_i_p:
+ return rr0(eLSR(rc(1), im(2)), Outputs);
+ case S2_lsr_i_r_acc:
+ case S2_lsr_i_p_acc:
+ return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_nac:
+ case S2_lsr_i_p_nac:
+ return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_and:
+ case S2_lsr_i_p_and:
+ return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_or:
+ case S2_lsr_i_p_or:
+ return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs);
+ case S2_lsr_i_r_xacc:
+ case S2_lsr_i_p_xacc:
+ return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs);
+
+ case S2_clrbit_i: {
+ RegisterCell RC = rc(1);
+ RC[im(2)] = BT::BitValue::Zero;
+ return rr0(RC, Outputs);
+ }
+ case S2_setbit_i: {
+ RegisterCell RC = rc(1);
+ RC[im(2)] = BT::BitValue::One;
+ return rr0(RC, Outputs);
+ }
+ case S2_togglebit_i: {
+ RegisterCell RC = rc(1);
+ uint16_t BX = im(2);
+ RC[BX] = RC[BX].is(0) ? BT::BitValue::One
+ : RC[BX].is(1) ? BT::BitValue::Zero
+ : BT::BitValue::self();
+ return rr0(RC, Outputs);
+ }
+
+ case A4_bitspliti: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ uint16_t BX = im(2);
+ // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx]
+ const BT::BitValue Zero = BT::BitValue::Zero;
+ RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero)
+ .fill(W1+(W1-BX), W0, Zero);
+ RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1);
+ RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1);
+ return rr0(RC, Outputs);
+ }
+ case S4_extract:
+ case S4_extractp:
+ case S2_extractu:
+ case S2_extractup: {
+ uint16_t Wd = im(2), Of = im(3);
+ assert(Wd <= W0);
+ if (Wd == 0)
+ return rr0(eIMM(0, W0), Outputs);
+ // If the width extends beyond the register size, pad the register
+ // with 0 bits.
+ RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1);
+ RegisterCell Ext = eXTR(Pad, Of, Wd+Of);
+ // Ext is short, need to extend it with 0s or sign bit.
+ RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1));
+ if (Opc == S2_extractu || Opc == S2_extractup)
+ return rr0(eZXT(RC, Wd), Outputs);
+ return rr0(eSXT(RC, Wd), Outputs);
+ }
+ case S2_insert:
+ case S2_insertp: {
+ uint16_t Wd = im(3), Of = im(4);
+ assert(Wd < W0 && Of < W0);
+ // If Wd+Of exceeds W0, the inserted bits are truncated.
+ if (Wd+Of > W0)
+ Wd = W0-Of;
+ if (Wd == 0)
+ return rr0(rc(1), Outputs);
+ return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs);
+ }
+
+ // Bit permutations:
+
+ case A2_combineii:
+ case A4_combineii:
+ case A4_combineir:
+ case A4_combineri:
+ case A2_combinew:
+ assert(W0 % 2 == 0);
+ return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
+ case A2_combine_ll:
+ case A2_combine_lh:
+ case A2_combine_hl:
+ case A2_combine_hh: {
+ assert(W0 == 32);
+ assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+ // Low half in the output is 0 for _ll and _hl, 1 otherwise:
+ unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl);
+ // High half in the output is 0 for _ll and _lh, 1 otherwise:
+ unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh);
+ RegisterCell R1 = rc(1);
+ RegisterCell R2 = rc(2);
+ RegisterCell RC = half(R2, LoH).cat(half(R1, HiH));
+ return rr0(RC, Outputs);
+ }
+ case S2_packhl: {
+ assert(W0 == 64);
+ assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+ RegisterCell R1 = rc(1);
+ RegisterCell R2 = rc(2);
+ RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1))
+ .cat(half(R1, 1));
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffeb: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 8, false);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffeh: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 16, false);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffob: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 8, true);
+ return rr0(RC, Outputs);
+ }
+ case S2_shuffoh: {
+ RegisterCell RC = shuffle(rc(1), rc(2), 16, true);
+ return rr0(RC, Outputs);
+ }
+ case C2_mask: {
+ uint16_t WR = W0;
+ uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+ assert(WR == 64 && WP == 8);
+ RegisterCell R1 = rc(1);
+ RegisterCell RC(WR);
+ for (uint16_t i = 0; i < WP; ++i) {
+ const BT::BitValue &V = R1[i];
+ BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self();
+ RC.fill(i*8, i*8+8, F);
+ }
+ return rr0(RC, Outputs);
+ }
+
+ // Mux:
+
+ case C2_muxii:
+ case C2_muxir:
+ case C2_muxri:
+ case C2_mux: {
+ BT::BitValue PC0 = rc(1)[0];
+ RegisterCell R2 = cop(2, W0);
+ RegisterCell R3 = cop(3, W0);
+ if (PC0.is(0) || PC0.is(1))
+ return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs);
+ R2.meet(R3, Reg[0].Reg);
+ return rr0(R2, Outputs);
+ }
+ case C2_vmux:
+ // TODO
+ break;
+
+ // Sign- and zero-extension:
+
+ case A2_sxtb:
+ return rr0(eSXT(rc(1), 8), Outputs);
+ case A2_sxth:
+ return rr0(eSXT(rc(1), 16), Outputs);
+ case A2_sxtw: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ assert(W0 == 64 && W1 == 32);
+ RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1);
+ return rr0(RC, Outputs);
+ }
+ case A2_zxtb:
+ return rr0(eZXT(rc(1), 8), Outputs);
+ case A2_zxth:
+ return rr0(eZXT(rc(1), 16), Outputs);
+
+ // Bit count:
+
+ case S2_cl0:
+ case S2_cl0p:
+ // Always produce a 32-bit result.
+ return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+ case S2_cl1:
+ case S2_cl1p:
+ return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+ case S2_clb:
+ case S2_clbp: {
+ uint16_t W1 = getRegBitWidth(Reg[1]);
+ RegisterCell R1 = rc(1);
+ BT::BitValue TV = R1[W1-1];
+ if (TV.is(0) || TV.is(1))
+ return rr0(eCLB(R1, TV, 32), Outputs);
+ break;
+ }
+ case S2_ct0:
+ case S2_ct0p:
+ return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+ case S2_ct1:
+ case S2_ct1p:
+ return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+ case S5_popcountp:
+ // TODO
+ break;
+
+ case C2_all8: {
+ RegisterCell P1 = rc(1);
+ bool Has0 = false, All1 = true;
+ for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+ if (!P1[i].is(1))
+ All1 = false;
+ if (!P1[i].is(0))
+ continue;
+ Has0 = true;
+ break;
+ }
+ if (!Has0 && !All1)
+ break;
+ RegisterCell RC(W0);
+ RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero));
+ return rr0(RC, Outputs);
+ }
+ case C2_any8: {
+ RegisterCell P1 = rc(1);
+ bool Has1 = false, All0 = true;
+ for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+ if (!P1[i].is(0))
+ All0 = false;
+ if (!P1[i].is(1))
+ continue;
+ Has1 = true;
+ break;
+ }
+ if (!Has1 && !All0)
+ break;
+ RegisterCell RC(W0);
+ RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero));
+ return rr0(RC, Outputs);
+ }
+ case C2_and:
+ return rr0(eAND(rc(1), rc(2)), Outputs);
+ case C2_andn:
+ return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+ case C2_not:
+ return rr0(eNOT(rc(1)), Outputs);
+ case C2_or:
+ return rr0(eORL(rc(1), rc(2)), Outputs);
+ case C2_orn:
+ return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+ case C2_xor:
+ return rr0(eXOR(rc(1), rc(2)), Outputs);
+ case C4_and_and:
+ return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case C4_and_andn:
+ return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_and_or:
+ return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case C4_and_orn:
+ return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_or_and:
+ return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+ case C4_or_andn:
+ return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+ case C4_or_or:
+ return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+ case C4_or_orn:
+ return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+ case C2_bitsclr:
+ case C2_bitsclri:
+ case C2_bitsset:
+ case C4_nbitsclr:
+ case C4_nbitsclri:
+ case C4_nbitsset:
+ // TODO
+ break;
+ case S2_tstbit_i:
+ case S4_ntstbit_i: {
+ BT::BitValue V = rc(1)[im(2)];
+ if (V.is(0) || V.is(1)) {
+ // If instruction is S2_tstbit_i, test for 1, otherwise test for 0.
+ bool TV = (Opc == S2_tstbit_i);
+ BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero;
+ return rr0(RegisterCell(W0).fill(0, W0, F), Outputs);
+ }
+ break;
+ }
+
+ default:
+ return MachineEvaluator::evaluate(MI, Inputs, Outputs);
+ }
+ #undef im
+ #undef rc
+ #undef op
+ return false;
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *BI,
+ const CellMapType &Inputs, BranchTargetList &Targets,
+ bool &FallsThru) const {
+ // We need to evaluate one branch at a time. TII::AnalyzeBranch checks
+ // all the branches in a basic block at once, so we cannot use it.
+ unsigned Opc = BI->getOpcode();
+ bool SimpleBranch = false;
+ bool Negated = false;
+ switch (Opc) {
+ case Hexagon::J2_jumpf:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumpfnewpt:
+ Negated = true;
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumptnewpt:
+ // Simple branch: if([!]Pn) jump ...
+ // i.e. Op0 = predicate, Op1 = branch target.
+ SimpleBranch = true;
+ break;
+ case Hexagon::J2_jump:
+ Targets.insert(BI->getOperand(0).getMBB());
+ FallsThru = false;
+ return true;
+ default:
+ // If the branch is of unknown type, assume that all successors are
+ // executable.
+ return false;
+ }
+
+ if (!SimpleBranch)
+ return false;
+
+ // BI is a conditional branch if we got here.
+ RegisterRef PR = BI->getOperand(0);
+ RegisterCell PC = getCell(PR, Inputs);
+ const BT::BitValue &Test = PC[0];
+
+ // If the condition is neither true nor false, then it's unknown.
+ if (!Test.is(0) && !Test.is(1))
+ return false;
+
+ // "Test.is(!Negated)" means "branch condition is true".
+ if (!Test.is(!Negated)) {
+ // Condition known to be false.
+ FallsThru = true;
+ return true;
+ }
+
+ Targets.insert(BI->getOperand(1).getMBB());
+ FallsThru = false;
+ return true;
+}
+
+
+bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ if (TII.isPredicated(MI))
+ return false;
+ assert(MI->mayLoad() && "A load that mayn't?");
+ unsigned Opc = MI->getOpcode();
+
+ uint16_t BitNum;
+ bool SignEx;
+ using namespace Hexagon;
+
+ switch (Opc) {
+ default:
+ return false;
+
+#if 0
+ // memb_fifo
+ case L2_loadalignb_pbr:
+ case L2_loadalignb_pcr:
+ case L2_loadalignb_pi:
+ // memh_fifo
+ case L2_loadalignh_pbr:
+ case L2_loadalignh_pcr:
+ case L2_loadalignh_pi:
+ // membh
+ case L2_loadbsw2_pbr:
+ case L2_loadbsw2_pci:
+ case L2_loadbsw2_pcr:
+ case L2_loadbsw2_pi:
+ case L2_loadbsw4_pbr:
+ case L2_loadbsw4_pci:
+ case L2_loadbsw4_pcr:
+ case L2_loadbsw4_pi:
+ // memubh
+ case L2_loadbzw2_pbr:
+ case L2_loadbzw2_pci:
+ case L2_loadbzw2_pcr:
+ case L2_loadbzw2_pi:
+ case L2_loadbzw4_pbr:
+ case L2_loadbzw4_pci:
+ case L2_loadbzw4_pcr:
+ case L2_loadbzw4_pi:
+#endif
+
+ case L2_loadrbgp:
+ case L2_loadrb_io:
+ case L2_loadrb_pbr:
+ case L2_loadrb_pci:
+ case L2_loadrb_pcr:
+ case L2_loadrb_pi:
+ case L4_loadrb_abs:
+ case L4_loadrb_ap:
+ case L4_loadrb_rr:
+ case L4_loadrb_ur:
+ BitNum = 8;
+ SignEx = true;
+ break;
+
+ case L2_loadrubgp:
+ case L2_loadrub_io:
+ case L2_loadrub_pbr:
+ case L2_loadrub_pci:
+ case L2_loadrub_pcr:
+ case L2_loadrub_pi:
+ case L4_loadrub_abs:
+ case L4_loadrub_ap:
+ case L4_loadrub_rr:
+ case L4_loadrub_ur:
+ BitNum = 8;
+ SignEx = false;
+ break;
+
+ case L2_loadrhgp:
+ case L2_loadrh_io:
+ case L2_loadrh_pbr:
+ case L2_loadrh_pci:
+ case L2_loadrh_pcr:
+ case L2_loadrh_pi:
+ case L4_loadrh_abs:
+ case L4_loadrh_ap:
+ case L4_loadrh_rr:
+ case L4_loadrh_ur:
+ BitNum = 16;
+ SignEx = true;
+ break;
+
+ case L2_loadruhgp:
+ case L2_loadruh_io:
+ case L2_loadruh_pbr:
+ case L2_loadruh_pci:
+ case L2_loadruh_pcr:
+ case L2_loadruh_pi:
+ case L4_loadruh_rr:
+ case L4_loadruh_abs:
+ case L4_loadruh_ap:
+ case L4_loadruh_ur:
+ BitNum = 16;
+ SignEx = false;
+ break;
+
+ case L2_loadrigp:
+ case L2_loadri_io:
+ case L2_loadri_pbr:
+ case L2_loadri_pci:
+ case L2_loadri_pcr:
+ case L2_loadri_pi:
+ case L2_loadw_locked:
+ case L4_loadri_abs:
+ case L4_loadri_ap:
+ case L4_loadri_rr:
+ case L4_loadri_ur:
+ case LDriw_pred:
+ BitNum = 32;
+ SignEx = true;
+ break;
+
+ case L2_loadrdgp:
+ case L2_loadrd_io:
+ case L2_loadrd_pbr:
+ case L2_loadrd_pci:
+ case L2_loadrd_pcr:
+ case L2_loadrd_pi:
+ case L4_loadd_locked:
+ case L4_loadrd_abs:
+ case L4_loadrd_ap:
+ case L4_loadrd_rr:
+ case L4_loadrd_ur:
+ BitNum = 64;
+ SignEx = true;
+ break;
+ }
+
+ const MachineOperand &MD = MI->getOperand(0);
+ assert(MD.isReg() && MD.isDef());
+ RegisterRef RD = MD;
+
+ uint16_t W = getRegBitWidth(RD);
+ assert(W >= BitNum && BitNum > 0);
+ RegisterCell Res(W);
+
+ for (uint16_t i = 0; i < BitNum; ++i)
+ Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i));
+
+ if (SignEx) {
+ const BT::BitValue &Sign = Res[BitNum-1];
+ for (uint16_t i = BitNum; i < W; ++i)
+ Res[i] = BT::BitValue::ref(Sign);
+ } else {
+ for (uint16_t i = BitNum; i < W; ++i)
+ Res[i] = BT::BitValue::Zero;
+ }
+
+ putCell(RD, Res, Outputs);
+ return true;
+}
+
+
+bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI,
+ const CellMapType &Inputs, CellMapType &Outputs) const {
+ // If MI defines a formal parameter, but is not a copy (loads are handled
+ // in evaluateLoad), then it's not clear what to do.
+ assert(MI->isCopy());
+
+ RegisterRef RD = MI->getOperand(0);
+ RegisterRef RS = MI->getOperand(1);
+ assert(RD.Sub == 0);
+ if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg))
+ return false;
+ RegExtMap::const_iterator F = VRX.find(RD.Reg);
+ if (F == VRX.end())
+ return false;
+
+ uint16_t EW = F->second.Width;
+ // Store RD's cell into the map. This will associate the cell with a virtual
+ // register, and make zero-/sign-extends possible (otherwise we would be ex-
+ // tending "self" bit values, which will have no effect, since "self" values
+ // cannot be references to anything).
+ putCell(RD, getCell(RS, Inputs), Outputs);
+
+ RegisterCell Res;
+ // Read RD's cell from the outputs instead of RS's cell from the inputs:
+ if (F->second.Type == ExtType::SExt)
+ Res = eSXT(getCell(RD, Outputs), EW);
+ else if (F->second.Type == ExtType::ZExt)
+ Res = eZXT(getCell(RD, Outputs), EW);
+
+ putCell(RD, Res, Outputs);
+ return true;
+}
+
+
+unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
+ using namespace Hexagon;
+ bool Is64 = DoubleRegsRegClass.contains(PReg);
+ assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
+
+ static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 };
+ static const unsigned Phys64[] = { D0, D1, D2 };
+ const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned);
+ const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned);
+
+ // Return the first parameter register of the required width.
+ if (PReg == 0)
+ return (Width <= 32) ? Phys32[0] : Phys64[0];
+
+ // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the
+ // next register.
+ unsigned Idx32 = 0, Idx64 = 0;
+ if (!Is64) {
+ while (Idx32 < Num32) {
+ if (Phys32[Idx32] == PReg)
+ break;
+ Idx32++;
+ }
+ Idx64 = Idx32/2;
+ } else {
+ while (Idx64 < Num64) {
+ if (Phys64[Idx64] == PReg)
+ break;
+ Idx64++;
+ }
+ Idx32 = Idx64*2+1;
+ }
+
+ if (Width <= 32)
+ return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0;
+ return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
+}
+
+
+unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
+ typedef MachineRegisterInfo::livein_iterator iterator;
+ for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ if (I->first == PReg)
+ return I->second;
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
new file mode 100644
index 0000000..897af2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@@ -0,0 +1,64 @@
+//===--- HexagonBitTracker.h ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBITTRACKER_H
+#define HEXAGONBITTRACKER_H
+
+#include "BitTracker.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class HexagonInstrInfo;
+ class HexagonRegisterInfo;
+
+struct HexagonEvaluator : public BitTracker::MachineEvaluator {
+ typedef BitTracker::CellMapType CellMapType;
+ typedef BitTracker::RegisterRef RegisterRef;
+ typedef BitTracker::RegisterCell RegisterCell;
+ typedef BitTracker::BranchTargetList BranchTargetList;
+
+ HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri,
+ const HexagonInstrInfo &tii, MachineFunction &mf);
+
+ bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const override;
+ bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+ BranchTargetList &Targets, bool &FallsThru) const override;
+
+ BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override;
+
+ MachineFunction &MF;
+ MachineFrameInfo &MFI;
+ const HexagonInstrInfo &TII;
+
+private:
+ bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+ bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs,
+ CellMapType &Outputs) const;
+
+ unsigned getNextPhysReg(unsigned PReg, unsigned Width) const;
+ unsigned getVirtRegFor(unsigned PReg) const;
+
+ // Type of formal parameter extension.
+ struct ExtType {
+ enum { SExt, ZExt };
+ char Type;
+ uint16_t Width;
+ ExtType() : Type(0), Width(0) {}
+ ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+ };
+ // Map VR -> extension type.
+ typedef DenseMap<unsigned, ExtType> RegExtMap;
+ RegExtMap VRX;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 0000000..efafdd0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,247 @@
+//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon_cfg"
+
+namespace llvm {
+ FunctionPass *createHexagonCFGOptimizer();
+ void initializeHexagonCFGOptimizerPass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+ void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+ static char ID;
+ HexagonCFGOptimizer() : MachineFunctionPass(ID) {
+ initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override {
+ return "Hexagon CFG Optimizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+ return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf)
+ || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+ return (Opc == Hexagon::J2_jump);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) {
+ const TargetInstrInfo *TII =
+ MI->getParent()->getParent()->getSubtarget().getInstrInfo();
+ int NewOpcode = 0;
+ switch(MI->getOpcode()) {
+ case Hexagon::J2_jumpt:
+ NewOpcode = Hexagon::J2_jumpf;
+ break;
+
+ case Hexagon::J2_jumpf:
+ NewOpcode = Hexagon::J2_jumpt;
+ break;
+
+ case Hexagon::J2_jumptnewpt:
+ NewOpcode = Hexagon::J2_jumpfnewpt;
+ break;
+
+ case Hexagon::J2_jumpfnewpt:
+ NewOpcode = Hexagon::J2_jumptnewpt;
+ break;
+
+ default:
+ llvm_unreachable("Cannot handle this case");
+ }
+
+ MI->setDesc(TII->get(NewOpcode));
+ MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+
+ // Traverse the basic block.
+ MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+ if (MII != MBB->end()) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (IsConditionalBranch(Opc)) {
+
+ //
+ // (Case 1) Transform the code if the following condition occurs:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...next block in layout is BB3...
+ // BB3: ...
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ //
+ // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...other basic blocks ...
+ // BB4:
+ // ...not a fall-thru
+ // BB3: ...
+ // jump BB4
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ // BB4: ...
+ //
+ unsigned NumSuccs = MBB->succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ MachineBasicBlock* FirstSucc = *SI;
+ MachineBasicBlock* SecondSucc = *(++SI);
+ MachineBasicBlock* LayoutSucc = nullptr;
+ MachineBasicBlock* JumpAroundTarget = nullptr;
+
+ if (MBB->isLayoutSuccessor(FirstSucc)) {
+ LayoutSucc = FirstSucc;
+ JumpAroundTarget = SecondSucc;
+ } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ LayoutSucc = SecondSucc;
+ JumpAroundTarget = FirstSucc;
+ } else {
+ // Odd case...cannot handle.
+ }
+
+ // The target of the unconditional branch must be JumpAroundTarget.
+ // TODO: If not, we should not invert the unconditional branch.
+ MachineBasicBlock* CondBranchTarget = nullptr;
+ if ((MI->getOpcode() == Hexagon::J2_jumpt) ||
+ (MI->getOpcode() == Hexagon::J2_jumpf)) {
+ CondBranchTarget = MI->getOperand(1).getMBB();
+ }
+
+ if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+ continue;
+ }
+
+ if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+ // Ensure that BB2 has one instruction -- an unconditional jump.
+ if ((LayoutSucc->size() == 1) &&
+ IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+ MachineBasicBlock* UncondTarget =
+ LayoutSucc->front().getOperand(0).getMBB();
+ // Check if the layout successor of BB2 is BB3.
+ bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+ bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+ JumpAroundTarget->size() >= 1 &&
+ IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+ JumpAroundTarget->pred_size() == 1 &&
+ JumpAroundTarget->succ_size() == 1;
+
+ if (case1 || case2) {
+ InvertAndChangeJumpTarget(MI, UncondTarget);
+ MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
+
+ // Remove the unconditional branch in LayoutSucc.
+ LayoutSucc->erase(LayoutSucc->begin());
+ LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget);
+
+ // This code performs the conversion for case 2, which moves
+ // the block to the fall-thru case (BB3 in the code above).
+ if (case2 && !case1) {
+ JumpAroundTarget->moveAfter(LayoutSucc);
+ // only move a block if it doesn't have a fall-thru. otherwise
+ // the CFG will be incorrect.
+ if (!UncondTarget->canFallThrough()) {
+ UncondTarget->moveAfter(JumpAroundTarget);
+ }
+ }
+
+ //
+ // Correct live-in information. Is used by post-RA scheduler
+ // The live-in to LayoutSucc is now all values live-in to
+ // JumpAroundTarget.
+ //
+ std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn(
+ LayoutSucc->livein_begin(), LayoutSucc->livein_end());
+ std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn(
+ JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (const auto &OrigLI : OrigLiveIn)
+ LayoutSucc->removeLiveIn(OrigLI.PhysReg);
+ for (const auto &NewLI : NewLiveIn)
+ LayoutSucc->addLiveIn(NewLI);
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+ PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
+ &HexagonCFGOptimizer::ID, nullptr, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass *llvm::createHexagonCFGOptimizer() {
+ return new HexagonCFGOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 0000000..e61b2a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+ CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
new file mode 100644
index 0000000..931db66
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -0,0 +1,1310 @@
+//===--- HexagonCommonGEP.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "commgep"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore);
+
+namespace llvm {
+ void initializeHexagonCommonGEPPass(PassRegistry&);
+}
+
+namespace {
+ struct GepNode;
+ typedef std::set<GepNode*> NodeSet;
+ typedef std::map<GepNode*,Value*> NodeToValueMap;
+ typedef std::vector<GepNode*> NodeVect;
+ typedef std::map<GepNode*,NodeVect> NodeChildrenMap;
+ typedef std::set<Use*> UseSet;
+ typedef std::map<GepNode*,UseSet> NodeToUsesMap;
+
+ // Numbering map for gep nodes. Used to keep track of ordering for
+ // gep nodes.
+ struct NodeOrdering {
+ NodeOrdering() : LastNum(0) {}
+
+ void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
+ void clear() { Map.clear(); }
+
+ bool operator()(const GepNode *N1, const GepNode *N2) const {
+ auto F1 = Map.find(N1), F2 = Map.find(N2);
+ assert(F1 != Map.end() && F2 != Map.end());
+ return F1->second < F2->second;
+ }
+
+ private:
+ std::map<const GepNode *, unsigned> Map;
+ unsigned LastNum;
+ };
+
+ class HexagonCommonGEP : public FunctionPass {
+ public:
+ static char ID;
+ HexagonCommonGEP() : FunctionPass(ID) {
+ initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "Hexagon Common GEP";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addPreserved<PostDominatorTree>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef std::map<Value*,GepNode*> ValueToNodeMap;
+ typedef std::vector<Value*> ValueVect;
+ typedef std::map<GepNode*,ValueVect> NodeToValuesMap;
+
+ void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order);
+ bool isHandledGepForm(GetElementPtrInst *GepI);
+ void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM);
+ void collect();
+ void common();
+
+ BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ bool isInvariantIn(Value *Val, Loop *L);
+ bool isInvariantIn(GepNode *Node, Loop *L);
+ bool isInMainPath(BasicBlock *B, Loop *L);
+ BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc);
+ void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM,
+ NodeToValueMap &Loc);
+ void computeNodePlacement(NodeToValueMap &Loc);
+
+ Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+ BasicBlock *LocB);
+ void getAllUsersForNode(GepNode *Node, ValueVect &Values,
+ NodeChildrenMap &NCM);
+ void materialize(NodeToValueMap &Loc);
+
+ void removeDeadCode();
+
+ NodeVect Nodes;
+ NodeToUsesMap Uses;
+ NodeOrdering NodeOrder; // Node ordering, for deterministic behavior.
+ SpecificBumpPtrAllocator<GepNode> *Mem;
+ LLVMContext *Ctx;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ Function *Fn;
+ };
+}
+
+
+char HexagonCommonGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+ false, false)
+
+namespace {
+ struct GepNode {
+ enum {
+ None = 0,
+ Root = 0x01,
+ Internal = 0x02,
+ Used = 0x04
+ };
+
+ uint32_t Flags;
+ union {
+ GepNode *Parent;
+ Value *BaseVal;
+ };
+ Value *Idx;
+ Type *PTy; // Type of the pointer operand.
+
+ GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {}
+ GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) {
+ if (Flags & Root)
+ BaseVal = N->BaseVal;
+ else
+ Parent = N->Parent;
+ }
+ friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN);
+ };
+
+
+ Type *next_type(Type *Ty, Value *Idx) {
+ // Advance the type.
+ if (!Ty->isStructTy()) {
+ Type *NexTy = cast<SequentialType>(Ty)->getElementType();
+ return NexTy;
+ }
+ // Otherwise it is a struct type.
+ ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+ assert(CI && "Struct type with non-constant index");
+ int64_t i = CI->getValue().getSExtValue();
+ Type *NextTy = cast<StructType>(Ty)->getElementType(i);
+ return NextTy;
+ }
+
+
+ raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
+ OS << "{ {";
+ bool Comma = false;
+ if (GN.Flags & GepNode::Root) {
+ OS << "root";
+ Comma = true;
+ }
+ if (GN.Flags & GepNode::Internal) {
+ if (Comma)
+ OS << ',';
+ OS << "internal";
+ Comma = true;
+ }
+ if (GN.Flags & GepNode::Used) {
+ if (Comma)
+ OS << ',';
+ OS << "used";
+ Comma = true;
+ }
+ OS << "} ";
+ if (GN.Flags & GepNode::Root)
+ OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
+ else
+ OS << "Parent:" << GN.Parent;
+
+ OS << " Idx:";
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx))
+ OS << CI->getValue().getSExtValue();
+ else if (GN.Idx->hasName())
+ OS << GN.Idx->getName();
+ else
+ OS << "<anon> =" << *GN.Idx;
+
+ OS << " PTy:";
+ if (GN.PTy->isStructTy()) {
+ StructType *STy = cast<StructType>(GN.PTy);
+ if (!STy->isLiteral())
+ OS << GN.PTy->getStructName();
+ else
+ OS << "<anon-struct>:" << *STy;
+ }
+ else
+ OS << *GN.PTy;
+ OS << " }";
+ return OS;
+ }
+
+
+ template <typename NodeContainer>
+ void dump_node_container(raw_ostream &OS, const NodeContainer &S) {
+ typedef typename NodeContainer::const_iterator const_iterator;
+ for (const_iterator I = S.begin(), E = S.end(); I != E; ++I)
+ OS << *I << ' ' << **I << '\n';
+ }
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
+ dump_node_container(OS, S);
+ return OS;
+ }
+
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
+ typedef NodeToUsesMap::const_iterator const_iterator;
+ for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const UseSet &Us = I->second;
+ OS << I->first << " -> #" << Us.size() << '{';
+ for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+ User *R = (*J)->getUser();
+ if (R->hasName())
+ OS << ' ' << R->getName();
+ else
+ OS << " <?>(" << *R << ')';
+ }
+ OS << " }\n";
+ }
+ return OS;
+ }
+
+
+ struct in_set {
+ in_set(const NodeSet &S) : NS(S) {}
+ bool operator() (GepNode *N) const {
+ return NS.find(N) != NS.end();
+ }
+ private:
+ const NodeSet &NS;
+ };
+}
+
+
+inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) {
+ return A.Allocate();
+}
+
+
+void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
+ ValueVect &Order) {
+ // Compute block ordering for a typical DT-based traversal of the flow
+ // graph: "before visiting a block, all of its dominators must have been
+ // visited".
+
+ Order.push_back(Root);
+ DomTreeNode *DTN = DT->getNode(Root);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+ getBlockTraversalOrder((*I)->getBlock(), Order);
+}
+
+
+bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
+ // No vector GEPs.
+ if (!GepI->getType()->isPointerTy())
+ return false;
+ // No GEPs without any indices. (Is this possible?)
+ if (GepI->idx_begin() == GepI->idx_end())
+ return false;
+ return true;
+}
+
+
+void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
+ ValueToNodeMap &NM) {
+ DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
+ GepNode *N = new (*Mem) GepNode;
+ Value *PtrOp = GepI->getPointerOperand();
+ ValueToNodeMap::iterator F = NM.find(PtrOp);
+ if (F == NM.end()) {
+ N->BaseVal = PtrOp;
+ N->Flags |= GepNode::Root;
+ } else {
+ // If PtrOp was a GEP instruction, it must have already been processed.
+ // The ValueToNodeMap entry for it is the last gep node in the generated
+ // chain. Link to it here.
+ N->Parent = F->second;
+ }
+ N->PTy = PtrOp->getType();
+ N->Idx = *GepI->idx_begin();
+
+ // Collect the list of users of this GEP instruction. Will add it to the
+ // last node created for it.
+ UseSet Us;
+ for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end();
+ UI != UE; ++UI) {
+ // Check if this gep is used by anything other than other geps that
+ // we will process.
+ if (isa<GetElementPtrInst>(*UI)) {
+ GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI);
+ if (isHandledGepForm(UserG))
+ continue;
+ }
+ Us.insert(&UI.getUse());
+ }
+ Nodes.push_back(N);
+ NodeOrder.insert(N);
+
+ // Skip the first index operand, since we only handle 0. This dereferences
+ // the pointer operand.
+ GepNode *PN = N;
+ Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType();
+ for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
+ OI != OE; ++OI) {
+ Value *Op = *OI;
+ GepNode *Nx = new (*Mem) GepNode;
+ Nx->Parent = PN; // Link Nx to the previous node.
+ Nx->Flags |= GepNode::Internal;
+ Nx->PTy = PtrTy;
+ Nx->Idx = Op;
+ Nodes.push_back(Nx);
+ NodeOrder.insert(Nx);
+ PN = Nx;
+
+ PtrTy = next_type(PtrTy, Op);
+ }
+
+ // After last node has been created, update the use information.
+ if (!Us.empty()) {
+ PN->Flags |= GepNode::Used;
+ Uses[PN].insert(Us.begin(), Us.end());
+ }
+
+ // Link the last node with the originating GEP instruction. This is to
+ // help with linking chained GEP instructions.
+ NM.insert(std::make_pair(GepI, PN));
+}
+
+
+void HexagonCommonGEP::collect() {
+ // Establish depth-first traversal order of the dominator tree.
+ ValueVect BO;
+ getBlockTraversalOrder(&Fn->front(), BO);
+
+ // The creation of gep nodes requires DT-traversal. When processing a GEP
+ // instruction that uses another GEP instruction as the base pointer, the
+ // gep node for the base pointer should already exist.
+ ValueToNodeMap NM;
+ for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) {
+ BasicBlock *B = cast<BasicBlock>(*I);
+ for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) {
+ if (!isa<GetElementPtrInst>(J))
+ continue;
+ GetElementPtrInst *GepI = cast<GetElementPtrInst>(J);
+ if (isHandledGepForm(GepI))
+ processGepInst(GepI, NM);
+ }
+ }
+
+ DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
+}
+
+
+namespace {
+ void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
+ NodeVect &Roots) {
+ typedef NodeVect::const_iterator const_iterator;
+ for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (N->Flags & GepNode::Root) {
+ Roots.push_back(N);
+ continue;
+ }
+ GepNode *PN = N->Parent;
+ NCM[PN].push_back(N);
+ }
+ }
+
+ void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) {
+ NodeVect Work;
+ Work.push_back(Root);
+ Nodes.insert(Root);
+
+ while (!Work.empty()) {
+ NodeVect::iterator First = Work.begin();
+ GepNode *N = *First;
+ Work.erase(First);
+ NodeChildrenMap::iterator CF = NCM.find(N);
+ if (CF != NCM.end()) {
+ Work.insert(Work.end(), CF->second.begin(), CF->second.end());
+ Nodes.insert(CF->second.begin(), CF->second.end());
+ }
+ }
+ }
+}
+
+
+namespace {
+ typedef std::set<NodeSet> NodeSymRel;
+ typedef std::pair<GepNode*,GepNode*> NodePair;
+ typedef std::set<NodePair> NodePairSet;
+
+ const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
+ for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
+ if (I->count(N))
+ return &*I;
+ return 0;
+ }
+
+ // Create an ordered pair of GepNode pointers. The pair will be used in
+ // determining equality. The only purpose of the ordering is to eliminate
+ // duplication due to the commutativity of equality/non-equality.
+ NodePair node_pair(GepNode *N1, GepNode *N2) {
+ uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
+ if (P1 <= P2)
+ return std::make_pair(N1, N2);
+ return std::make_pair(N2, N1);
+ }
+
+ unsigned node_hash(GepNode *N) {
+ // Include everything except flags and parent.
+ FoldingSetNodeID ID;
+ ID.AddPointer(N->Idx);
+ ID.AddPointer(N->PTy);
+ return ID.ComputeHash();
+ }
+
+ bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) {
+ // Don't cache the result for nodes with different hashes. The hash
+ // comparison is fast enough.
+ if (node_hash(N1) != node_hash(N2))
+ return false;
+
+ NodePair NP = node_pair(N1, N2);
+ NodePairSet::iterator FEq = Eq.find(NP);
+ if (FEq != Eq.end())
+ return true;
+ NodePairSet::iterator FNe = Ne.find(NP);
+ if (FNe != Ne.end())
+ return false;
+ // Not previously compared.
+ bool Root1 = N1->Flags & GepNode::Root;
+ bool Root2 = N2->Flags & GepNode::Root;
+ NodePair P = node_pair(N1, N2);
+ // If the Root flag has different values, the nodes are different.
+ // If both nodes are root nodes, but their base pointers differ,
+ // they are different.
+ if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) {
+ Ne.insert(P);
+ return false;
+ }
+ // Here the root flags are identical, and for root nodes the
+ // base pointers are equal, so the root nodes are equal.
+ // For non-root nodes, compare their parent nodes.
+ if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) {
+ Eq.insert(P);
+ return true;
+ }
+ return false;
+ }
+}
+
+
+void HexagonCommonGEP::common() {
+ // The essence of this commoning is finding gep nodes that are equal.
+ // To do this we need to compare all pairs of nodes. To save time,
+ // first, partition the set of all nodes into sets of potentially equal
+ // nodes, and then compare pairs from within each partition.
+ typedef std::map<unsigned,NodeSet> NodeSetMap;
+ NodeSetMap MaybeEq;
+
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ unsigned H = node_hash(N);
+ MaybeEq[H].insert(N);
+ }
+
+ // Compute the equivalence relation for the gep nodes. Use two caches,
+ // one for equality and the other for non-equality.
+ NodeSymRel EqRel; // Equality relation (as set of equivalence classes).
+ NodePairSet Eq, Ne; // Caches.
+ for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end();
+ I != E; ++I) {
+ NodeSet &S = I->second;
+ for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) {
+ GepNode *N = *NI;
+ // If node already has a class, then the class must have been created
+ // in a prior iteration of this loop. Since equality is transitive,
+ // nothing more will be added to that class, so skip it.
+ if (node_class(N, EqRel))
+ continue;
+
+ // Create a new class candidate now.
+ NodeSet C;
+ for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ)
+ if (node_eq(N, *NJ, Eq, Ne))
+ C.insert(*NJ);
+ // If Tmp is empty, N would be the only element in it. Don't bother
+ // creating a class for it then.
+ if (!C.empty()) {
+ C.insert(N); // Finalize the set before adding it to the relation.
+ std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C);
+ (void)Ins;
+ assert(Ins.second && "Cannot add a class");
+ }
+ }
+ }
+
+ DEBUG({
+ dbgs() << "Gep node equality:\n";
+ for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I)
+ dbgs() << "{ " << I->first << ", " << I->second << " }\n";
+
+ dbgs() << "Gep equivalence classes:\n";
+ for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ dbgs() << '{';
+ const NodeSet &S = *I;
+ for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
+ if (J != S.begin())
+ dbgs() << ',';
+ dbgs() << ' ' << *J;
+ }
+ dbgs() << " }\n";
+ }
+ });
+
+
+ // Create a projection from a NodeSet to the minimal element in it.
+ typedef std::map<const NodeSet*,GepNode*> ProjMap;
+ ProjMap PM;
+ for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ const NodeSet &S = *I;
+ GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
+ std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
+ (void)Ins;
+ assert(Ins.second && "Cannot add minimal element");
+
+ // Update the min element's flags, and user list.
+ uint32_t Flags = 0;
+ UseSet &MinUs = Uses[Min];
+ for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) {
+ GepNode *N = *J;
+ uint32_t NF = N->Flags;
+ // If N is used, append all original values of N to the list of
+ // original values of Min.
+ if (NF & GepNode::Used)
+ MinUs.insert(Uses[N].begin(), Uses[N].end());
+ Flags |= NF;
+ }
+ if (MinUs.empty())
+ Uses.erase(Min);
+
+ // The collected flags should include all the flags from the min element.
+ assert((Min->Flags & Flags) == Min->Flags);
+ Min->Flags = Flags;
+ }
+
+ // Commoning: for each non-root gep node, replace "Parent" with the
+ // selected (minimum) node from the corresponding equivalence class.
+ // If a given parent does not have an equivalence class, leave it
+ // unchanged (it means that it's the only element in its class).
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (N->Flags & GepNode::Root)
+ continue;
+ const NodeSet *PC = node_class(N->Parent, EqRel);
+ if (!PC)
+ continue;
+ ProjMap::iterator F = PM.find(PC);
+ if (F == PM.end())
+ continue;
+ // Found a replacement, use it.
+ GepNode *Rep = F->second;
+ N->Parent = Rep;
+ }
+
+ DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
+
+ // Finally, erase the nodes that are no longer used.
+ NodeSet Erase;
+ for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+ GepNode *N = *I;
+ const NodeSet *PC = node_class(N, EqRel);
+ if (!PC)
+ continue;
+ ProjMap::iterator F = PM.find(PC);
+ if (F == PM.end())
+ continue;
+ if (N == F->second)
+ continue;
+ // Node for removal.
+ Erase.insert(*I);
+ }
+ NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(),
+ in_set(Erase));
+ Nodes.resize(std::distance(Nodes.begin(), NewE));
+
+ DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
+}
+
+
+namespace {
+ template <typename T>
+ BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
+ DEBUG({
+ dbgs() << "NCD of {";
+ for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ if (!*I)
+ continue;
+ BasicBlock *B = cast<BasicBlock>(*I);
+ dbgs() << ' ' << B->getName();
+ }
+ dbgs() << " }\n";
+ });
+
+ // Allow null basic blocks in Blocks. In such cases, return 0.
+ typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ if (I == E || !*I)
+ return 0;
+ BasicBlock *Dom = cast<BasicBlock>(*I);
+ while (++I != E) {
+ BasicBlock *B = cast_or_null<BasicBlock>(*I);
+ Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0;
+ if (!Dom)
+ return 0;
+ }
+ DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
+ return Dom;
+ }
+
+ template <typename T>
+ BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
+ // If two blocks, A and B, dominate a block C, then A dominates B,
+ // or B dominates A.
+ typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ // Find the first non-null block.
+ while (I != E && !*I)
+ ++I;
+ if (I == E)
+ return DT->getRoot();
+ BasicBlock *DomB = cast<BasicBlock>(*I);
+ while (++I != E) {
+ if (!*I)
+ continue;
+ BasicBlock *B = cast<BasicBlock>(*I);
+ if (DT->dominates(B, DomB))
+ continue;
+ if (!DT->dominates(DomB, B))
+ return 0;
+ DomB = B;
+ }
+ return DomB;
+ }
+
+ // Find the first use in B of any value from Values. If no such use,
+ // return B->end().
+ template <typename T>
+ BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
+ BasicBlock::iterator FirstUse = B->end(), BEnd = B->end();
+ typedef typename T::iterator iterator;
+ for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) {
+ Value *V = *I;
+ // If V is used in a PHI node, the use belongs to the incoming block,
+ // not the block with the PHI node. In the incoming block, the use
+ // would be considered as being at the end of it, so it cannot
+ // influence the position of the first use (which is assumed to be
+ // at the end to start with).
+ if (isa<PHINode>(V))
+ continue;
+ if (!isa<Instruction>(V))
+ continue;
+ Instruction *In = cast<Instruction>(V);
+ if (In->getParent() != B)
+ continue;
+ BasicBlock::iterator It = In->getIterator();
+ if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
+ FirstUse = It;
+ }
+ return FirstUse;
+ }
+
+ bool is_empty(const BasicBlock *B) {
+ return B->empty() || (&*B->begin() == B->getTerminator());
+ }
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "Loc for node:" << Node << '\n');
+ // Recalculate the placement for Node, assuming that the locations of
+ // its children in Loc are valid.
+ // Return 0 if there is no valid placement for Node (for example, it
+ // uses an index value that is not available at the location required
+ // to dominate all children, etc.).
+
+ // Find the nearest common dominator for:
+ // - all users, if the node is used, and
+ // - all children.
+ ValueVect Bs;
+ if (Node->Flags & GepNode::Used) {
+ // Append all blocks with uses of the original values to the
+ // block vector Bs.
+ NodeToUsesMap::iterator UF = Uses.find(Node);
+ assert(UF != Uses.end() && "Used node with no use information");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+ Use *U = *I;
+ User *R = U->getUser();
+ if (!isa<Instruction>(R))
+ continue;
+ BasicBlock *PB = isa<PHINode>(R)
+ ? cast<PHINode>(R)->getIncomingBlock(*U)
+ : cast<Instruction>(R)->getParent();
+ Bs.push_back(PB);
+ }
+ }
+ // Append the location of each child.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+ GepNode *CN = *I;
+ NodeToValueMap::iterator LF = Loc.find(CN);
+ // If the child is only used in GEP instructions (i.e. is not used in
+ // non-GEP instructions), the nearest dominator computed for it may
+ // have been null. In such case it won't have a location available.
+ if (LF == Loc.end())
+ continue;
+ Bs.push_back(LF->second);
+ }
+ }
+
+ BasicBlock *DomB = nearest_common_dominator(DT, Bs);
+ if (!DomB)
+ return 0;
+ // Check if the index used by Node dominates the computed dominator.
+ Instruction *IdxI = dyn_cast<Instruction>(Node->Idx);
+ if (IdxI && !DT->dominates(IdxI->getParent(), DomB))
+ return 0;
+
+ // Avoid putting nodes into empty blocks.
+ while (is_empty(DomB)) {
+ DomTreeNode *N = (*DT)[DomB]->getIDom();
+ if (!N)
+ break;
+ DomB = N->getBlock();
+ }
+
+ // Otherwise, DomB is fine. Update the location map.
+ Loc[Node] = DomB;
+ return DomB;
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
+ // Recalculate the placement of Node, after recursively recalculating the
+ // placements of all its children.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+ recalculatePlacementRec(*I, NCM, Loc);
+ }
+ BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
+ DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
+ return LB;
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
+ if (isa<Constant>(Val) || isa<Argument>(Val))
+ return true;
+ Instruction *In = dyn_cast<Instruction>(Val);
+ if (!In)
+ return false;
+ BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent();
+ return DT->properlyDominates(DefB, HdrB);
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
+ if (Node->Flags & GepNode::Root)
+ if (!isInvariantIn(Node->BaseVal, L))
+ return false;
+ return isInvariantIn(Node->Idx, L);
+}
+
+
+bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
+ BasicBlock *HB = L->getHeader();
+ BasicBlock *LB = L->getLoopLatch();
+ // B must post-dominate the loop header or dominate the loop latch.
+ if (PDT->dominates(B, HB))
+ return true;
+ if (LB && DT->dominates(B, LB))
+ return true;
+ return false;
+}
+
+
+namespace {
+ BasicBlock *preheader(DominatorTree *DT, Loop *L) {
+ if (BasicBlock *PH = L->getLoopPreheader())
+ return PH;
+ if (!OptSpeculate)
+ return 0;
+ DomTreeNode *DN = DT->getNode(L->getHeader());
+ if (!DN)
+ return 0;
+ return DN->getIDom()->getBlock();
+ }
+}
+
+
+BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ // Find the "topmost" location for Node: it must be dominated by both,
+ // its parent (or the BaseVal, if it's a root node), and by the index
+ // value.
+ ValueVect Bs;
+ if (Node->Flags & GepNode::Root) {
+ if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal))
+ Bs.push_back(PIn->getParent());
+ } else {
+ Bs.push_back(Loc[Node->Parent]);
+ }
+ if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx))
+ Bs.push_back(IIn->getParent());
+ BasicBlock *TopB = nearest_common_dominatee(DT, Bs);
+
+ // Traverse the loop nest upwards until we find a loop in which Node
+ // is no longer invariant, or until we get to the upper limit of Node's
+ // placement. The traversal will also stop when a suitable "preheader"
+ // cannot be found for a given loop. The "preheader" may actually be
+ // a regular block outside of the loop (i.e. not guarded), in which case
+ // the Node will be speculated.
+ // For nodes that are not in the main path of the containing loop (i.e.
+ // are not executed in each iteration), do not move them out of the loop.
+ BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]);
+ if (LocB) {
+ Loop *Lp = LI->getLoopFor(LocB);
+ while (Lp) {
+ if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp))
+ break;
+ BasicBlock *NewLoc = preheader(DT, Lp);
+ if (!NewLoc || !DT->dominates(TopB, NewLoc))
+ break;
+ Lp = Lp->getParentLoop();
+ LocB = NewLoc;
+ }
+ }
+ Loc[Node] = LocB;
+
+ // Recursively compute the locations of all children nodes.
+ NodeChildrenMap::iterator CF = NCM.find(Node);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+ adjustForInvariance(*I, NCM, Loc);
+ }
+ return LocB;
+}
+
+
+namespace {
+ struct LocationAsBlock {
+ LocationAsBlock(const NodeToValueMap &L) : Map(L) {}
+ const NodeToValueMap &Map;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS,
+ const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
+ raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+ for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end();
+ I != E; ++I) {
+ OS << I->first << " -> ";
+ BasicBlock *B = cast<BasicBlock>(I->second);
+ OS << B->getName() << '(' << B << ')';
+ OS << '\n';
+ }
+ return OS;
+ }
+
+ inline bool is_constant(GepNode *N) {
+ return isa<ConstantInt>(N->Idx);
+ }
+}
+
+
+void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
+ NodeToValueMap &Loc) {
+ User *R = U->getUser();
+ DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: "
+ << *R << '\n');
+ BasicBlock *PB = cast<Instruction>(R)->getParent();
+
+ GepNode *N = Node;
+ GepNode *C = 0, *NewNode = 0;
+ while (is_constant(N) && !(N->Flags & GepNode::Root)) {
+ // XXX if (single-use) dont-replicate;
+ GepNode *NewN = new (*Mem) GepNode(N);
+ Nodes.push_back(NewN);
+ Loc[NewN] = PB;
+
+ if (N == Node)
+ NewNode = NewN;
+ NewN->Flags &= ~GepNode::Used;
+ if (C)
+ C->Parent = NewN;
+ C = NewN;
+ N = N->Parent;
+ }
+ if (!NewNode)
+ return;
+
+ // Move over all uses that share the same user as U from Node to NewNode.
+ NodeToUsesMap::iterator UF = Uses.find(Node);
+ assert(UF != Uses.end());
+ UseSet &Us = UF->second;
+ UseSet NewUs;
+ for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
+ User *S = (*I)->getUser();
+ UseSet::iterator Nx = std::next(I);
+ if (S == R) {
+ NewUs.insert(*I);
+ Us.erase(I);
+ }
+ I = Nx;
+ }
+ if (Us.empty()) {
+ Node->Flags &= ~GepNode::Used;
+ Uses.erase(UF);
+ }
+
+ // Should at least have U in NewUs.
+ NewNode->Flags |= GepNode::Used;
+ DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n');
+ assert(!NewUs.empty());
+ Uses[NewNode] = NewUs;
+}
+
+
+void HexagonCommonGEP::separateConstantChains(GepNode *Node,
+ NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+ // First approximation: extract all chains.
+ NodeSet Ns;
+ nodes_for_root(Node, NCM, Ns);
+
+ DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
+ // Collect all used nodes together with the uses from loads and stores,
+ // where the GEP node could be folded into the load/store instruction.
+ NodeToUsesMap FNs; // Foldable nodes.
+ for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) {
+ GepNode *N = *I;
+ if (!(N->Flags & GepNode::Used))
+ continue;
+ NodeToUsesMap::iterator UF = Uses.find(N);
+ assert(UF != Uses.end());
+ UseSet &Us = UF->second;
+ // Loads/stores that use the node N.
+ UseSet LSs;
+ for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+ Use *U = *J;
+ User *R = U->getUser();
+ // We're interested in uses that provide the address. It can happen
+ // that the value may also be provided via GEP, but we won't handle
+ // those cases here for now.
+ if (LoadInst *Ld = dyn_cast<LoadInst>(R)) {
+ unsigned PtrX = LoadInst::getPointerOperandIndex();
+ if (&Ld->getOperandUse(PtrX) == U)
+ LSs.insert(U);
+ } else if (StoreInst *St = dyn_cast<StoreInst>(R)) {
+ unsigned PtrX = StoreInst::getPointerOperandIndex();
+ if (&St->getOperandUse(PtrX) == U)
+ LSs.insert(U);
+ }
+ }
+ // Even if the total use count is 1, separating the chain may still be
+ // beneficial, since the constant chain may be longer than the GEP alone
+ // would be (e.g. if the parent node has a constant index and also has
+ // other children).
+ if (!LSs.empty())
+ FNs.insert(std::make_pair(N, LSs));
+ }
+
+ DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
+
+ for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
+ GepNode *N = I->first;
+ UseSet &Us = I->second;
+ for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J)
+ separateChainForNode(N, *J, Loc);
+ }
+}
+
+
+void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
+ // Compute the inverse of the Node.Parent links. Also, collect the set
+ // of root nodes.
+ NodeChildrenMap NCM;
+ NodeVect Roots;
+ invert_find_roots(Nodes, NCM, Roots);
+
+ // Compute the initial placement determined by the users' locations, and
+ // the locations of the child nodes.
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ recalculatePlacementRec(*I, NCM, Loc);
+
+ DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
+
+ if (OptEnableInv) {
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ adjustForInvariance(*I, NCM, Loc);
+
+ DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
+ << LocationAsBlock(Loc));
+ }
+ if (OptEnableConst) {
+ for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+ separateConstantChains(*I, NCM, Loc);
+ }
+ DEBUG(dbgs() << "Node use information:\n" << Uses);
+
+ // At the moment, there is no further refinement of the initial placement.
+ // Such a refinement could include splitting the nodes if they are placed
+ // too far from some of its users.
+
+ DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
+}
+
+
+Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+ BasicBlock *LocB) {
+ DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
+ << " for nodes:\n" << NA);
+ unsigned Num = NA.size();
+ GepNode *RN = NA[0];
+ assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
+
+ Value *NewInst = 0;
+ Value *Input = RN->BaseVal;
+ Value **IdxList = new Value*[Num+1];
+ unsigned nax = 0;
+ do {
+ unsigned IdxC = 0;
+ // If the type of the input of the first node is not a pointer,
+ // we need to add an artificial i32 0 to the indices (because the
+ // actual input in the IR will be a pointer).
+ if (!NA[nax]->PTy->isPointerTy()) {
+ Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0);
+ }
+
+ // Keep adding indices from NA until we have to stop and generate
+ // an "intermediate" GEP.
+ while (++nax <= Num) {
+ GepNode *N = NA[nax-1];
+ IdxList[IdxC++] = N->Idx;
+ if (nax < Num) {
+ // We have to stop, if the expected type of the output of this node
+ // is not the same as the input type of the next node.
+ Type *NextTy = next_type(N->PTy, N->Idx);
+ if (NextTy != NA[nax]->PTy)
+ break;
+ }
+ }
+ ArrayRef<Value*> A(IdxList, IdxC);
+ Type *InpTy = Input->getType();
+ Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
+ NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
+ DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
+ Input = NewInst;
+ } while (nax <= Num);
+
+ delete[] IdxList;
+ return NewInst;
+}
+
+
+void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
+ NodeChildrenMap &NCM) {
+ NodeVect Work;
+ Work.push_back(Node);
+
+ while (!Work.empty()) {
+ NodeVect::iterator First = Work.begin();
+ GepNode *N = *First;
+ Work.erase(First);
+ if (N->Flags & GepNode::Used) {
+ NodeToUsesMap::iterator UF = Uses.find(N);
+ assert(UF != Uses.end() && "No use information for used node");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I)
+ Values.push_back((*I)->getUser());
+ }
+ NodeChildrenMap::iterator CF = NCM.find(N);
+ if (CF != NCM.end()) {
+ NodeVect &Cs = CF->second;
+ Work.insert(Work.end(), Cs.begin(), Cs.end());
+ }
+ }
+}
+
+
+void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
+ DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
+ NodeChildrenMap NCM;
+ NodeVect Roots;
+ // Compute the inversion again, since computing placement could alter
+ // "parent" relation between nodes.
+ invert_find_roots(Nodes, NCM, Roots);
+
+ while (!Roots.empty()) {
+ NodeVect::iterator First = Roots.begin();
+ GepNode *Root = *First, *Last = *First;
+ Roots.erase(First);
+
+ NodeVect NA; // Nodes to assemble.
+ // Append to NA all child nodes up to (and including) the first child
+ // that:
+ // (1) has more than 1 child, or
+ // (2) is used, or
+ // (3) has a child located in a different block.
+ bool LastUsed = false;
+ unsigned LastCN = 0;
+ // The location may be null if the computation failed (it can legitimately
+ // happen for nodes created from dead GEPs).
+ Value *LocV = Loc[Last];
+ if (!LocV)
+ continue;
+ BasicBlock *LastB = cast<BasicBlock>(LocV);
+ do {
+ NA.push_back(Last);
+ LastUsed = (Last->Flags & GepNode::Used);
+ if (LastUsed)
+ break;
+ NodeChildrenMap::iterator CF = NCM.find(Last);
+ LastCN = (CF != NCM.end()) ? CF->second.size() : 0;
+ if (LastCN != 1)
+ break;
+ GepNode *Child = CF->second.front();
+ BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]);
+ if (ChildB != 0 && LastB != ChildB)
+ break;
+ Last = Child;
+ } while (true);
+
+ BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator();
+ if (LastUsed || LastCN > 0) {
+ ValueVect Urs;
+ getAllUsersForNode(Root, Urs, NCM);
+ BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB);
+ if (FirstUse != LastB->end())
+ InsertAt = FirstUse;
+ }
+
+ // Generate a new instruction for NA.
+ Value *NewInst = fabricateGEP(NA, InsertAt, LastB);
+
+ // Convert all the children of Last node into roots, and append them
+ // to the Roots list.
+ if (LastCN > 0) {
+ NodeVect &Cs = NCM[Last];
+ for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+ GepNode *CN = *I;
+ CN->Flags &= ~GepNode::Internal;
+ CN->Flags |= GepNode::Root;
+ CN->BaseVal = NewInst;
+ Roots.push_back(CN);
+ }
+ }
+
+ // Lastly, if the Last node was used, replace all uses with the new GEP.
+ // The uses reference the original GEP values.
+ if (LastUsed) {
+ NodeToUsesMap::iterator UF = Uses.find(Last);
+ assert(UF != Uses.end() && "No use information found");
+ UseSet &Us = UF->second;
+ for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+ Use *U = *I;
+ U->set(NewInst);
+ }
+ }
+ }
+}
+
+
+void HexagonCommonGEP::removeDeadCode() {
+ ValueVect BO;
+ BO.push_back(&Fn->front());
+
+ for (unsigned i = 0; i < BO.size(); ++i) {
+ BasicBlock *B = cast<BasicBlock>(BO[i]);
+ DomTreeNode *N = DT->getNode(B);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ BO.push_back((*I)->getBlock());
+ }
+
+ for (unsigned i = BO.size(); i > 0; --i) {
+ BasicBlock *B = cast<BasicBlock>(BO[i-1]);
+ BasicBlock::InstListType &IL = B->getInstList();
+ typedef BasicBlock::InstListType::reverse_iterator reverse_iterator;
+ ValueVect Ins;
+ for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I)
+ Ins.push_back(&*I);
+ for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
+ Instruction *In = cast<Instruction>(*I);
+ if (isInstructionTriviallyDead(In))
+ In->eraseFromParent();
+ }
+ }
+}
+
+
+bool HexagonCommonGEP::runOnFunction(Function &F) {
+ // For now bail out on C++ exception handling.
+ for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
+ for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+ if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
+ return false;
+
+ Fn = &F;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PDT = &getAnalysis<PostDominatorTree>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ Ctx = &F.getContext();
+
+ Nodes.clear();
+ Uses.clear();
+ NodeOrder.clear();
+
+ SpecificBumpPtrAllocator<GepNode> Allocator;
+ Mem = &Allocator;
+
+ collect();
+ common();
+
+ NodeToValueMap Loc;
+ computeNodePlacement(Loc);
+ materialize(Loc);
+ removeDeadCode();
+
+#ifdef XDEBUG
+ // Run this only when expensive checks are enabled.
+ verifyFunction(F);
+#endif
+ return true;
+}
+
+
+namespace llvm {
+ FunctionPass *createHexagonCommonGEP() {
+ return new HexagonCommonGEP();
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
new file mode 100644
index 0000000..9fd863f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -0,0 +1,754 @@
+//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass replaces transfer instructions by combine instructions.
+// We walk along a basic block and look for two combinable instructions and try
+// to move them together. If we can move them next to each other we do so and
+// replace them with a combine instruction.
+//===----------------------------------------------------------------------===//
+#include "llvm/PassSupport.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-copy-combine"
+
+static
+cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines",
+ cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Disable merging into combines"));
+static
+cl::opt<unsigned>
+MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store",
+ cl::Hidden, cl::init(4),
+ cl::desc("Maximum distance between a tfr feeding a store we "
+ "consider the store still to be newifiable"));
+
+namespace llvm {
+ FunctionPass *createHexagonCopyToCombine();
+ void initializeHexagonCopyToCombinePass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonCopyToCombine : public MachineFunctionPass {
+ const HexagonInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ bool ShouldCombineAggressively;
+
+ DenseSet<MachineInstr *> PotentiallyNewifiableTFR;
+public:
+ static char ID;
+
+ HexagonCopyToCombine() : MachineFunctionPass(ID) {
+ initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const override {
+ return "Hexagon Copy-To-Combine Pass";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1);
+
+ void findPotentialNewifiableTFRs(MachineBasicBlock &);
+
+ void combine(MachineInstr *I1, MachineInstr *I2,
+ MachineBasicBlock::iterator &MI, bool DoInsertAtI1);
+
+ bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2,
+ unsigned I1DestReg, unsigned I2DestReg,
+ bool &DoInsertAtI1);
+
+ void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+ MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+ void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg,
+ MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+ void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+ MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+ void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg,
+ MachineOperand &HiOperand, MachineOperand &LoOperand);
+};
+
+} // End anonymous namespace.
+
+char HexagonCopyToCombine::ID = 0;
+
+INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine",
+ "Hexagon Copy-To-Combine Pass", false, false)
+
+static bool isCombinableInstType(MachineInstr *MI,
+ const HexagonInstrInfo *TII,
+ bool ShouldCombineAggressively) {
+ switch(MI->getOpcode()) {
+ case Hexagon::A2_tfr: {
+ // A COPY instruction can be combined if its arguments are IntRegs (32bit).
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
+
+ unsigned DestReg = Op0.getReg();
+ unsigned SrcReg = Op1.getReg();
+ return Hexagon::IntRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg);
+ }
+
+ case Hexagon::A2_tfrsi: {
+ // A transfer-immediate can be combined if its argument is a signed 8bit
+ // value.
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg());
+
+ unsigned DestReg = Op0.getReg();
+ // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
+ // workaround for an ABI bug that prevents GOT relocations on combine
+ // instructions
+ if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
+ return false;
+
+ // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+ bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
+ return Hexagon::IntRegsRegClass.contains(DestReg) &&
+ (ShouldCombineAggressively || NotExt);
+ }
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+template <unsigned N>
+static bool isGreaterThanNBitTFRI(const MachineInstr *I) {
+ if (I->getOpcode() == Hexagon::TFRI64_V4 ||
+ I->getOpcode() == Hexagon::A2_tfrsi) {
+ const MachineOperand &Op = I->getOperand(1);
+ return !Op.isImm() || !isInt<N>(Op.getImm());
+ }
+ return false;
+}
+
+/// areCombinableOperations - Returns true if the two instruction can be merge
+/// into a combine (ignoring register constraints).
+static bool areCombinableOperations(const TargetRegisterInfo *TRI,
+ MachineInstr *HighRegInst,
+ MachineInstr *LowRegInst) {
+ unsigned HiOpc = HighRegInst->getOpcode();
+ unsigned LoOpc = LowRegInst->getOpcode();
+ (void)HiOpc; // Fix compiler warning
+ (void)LoOpc; // Fix compiler warning
+ assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) &&
+ (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) &&
+ "Assume individual instructions are of a combinable type");
+
+ // There is no combine of two constant extended values.
+ if (isGreaterThanNBitTFRI<8>(HighRegInst) &&
+ isGreaterThanNBitTFRI<6>(LowRegInst))
+ return false;
+
+ return true;
+}
+
+static bool isEvenReg(unsigned Reg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ Hexagon::IntRegsRegClass.contains(Reg));
+ return (Reg - Hexagon::R0) % 2 == 0;
+}
+
+static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) {
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill())
+ continue;
+ Op.setIsKill(false);
+ }
+}
+
+/// isUnsafeToMoveAcross - Returns true if it is unsafe to move a copy
+/// instruction from \p UseReg to \p DestReg over the instruction \p I.
+static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg,
+ unsigned DestReg,
+ const TargetRegisterInfo *TRI) {
+ return (UseReg && (I->modifiesRegister(UseReg, TRI))) ||
+ I->modifiesRegister(DestReg, TRI) ||
+ I->readsRegister(DestReg, TRI) ||
+ I->hasUnmodeledSideEffects() ||
+ I->isInlineAsm() || I->isDebugValue();
+}
+
+static unsigned UseReg(const MachineOperand& MO) {
+ return MO.isReg() ? MO.getReg() : 0;
+}
+
+/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
+/// that the two instructions can be paired in a combine.
+bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
+ MachineInstr *I2,
+ unsigned I1DestReg,
+ unsigned I2DestReg,
+ bool &DoInsertAtI1) {
+ unsigned I2UseReg = UseReg(I2->getOperand(1));
+
+ // It is not safe to move I1 and I2 into one combine if I2 has a true
+ // dependence on I1.
+ if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI))
+ return false;
+
+ bool isSafe = true;
+
+ // First try to move I2 towards I1.
+ {
+ // A reverse_iterator instantiated like below starts before I2, and I1
+ // respectively.
+ // Look at instructions I in between I2 and (excluding) I1.
+ MachineBasicBlock::reverse_iterator I(I2),
+ End = --(MachineBasicBlock::reverse_iterator(I1));
+ // At 03 we got better results (dhrystone!) by being more conservative.
+ if (!ShouldCombineAggressively)
+ End = MachineBasicBlock::reverse_iterator(I1);
+ // If I2 kills its operand and we move I2 over an instruction that also
+ // uses I2's use reg we need to modify that (first) instruction to now kill
+ // this reg.
+ unsigned KilledOperand = 0;
+ if (I2->killsRegister(I2UseReg))
+ KilledOperand = I2UseReg;
+ MachineInstr *KillingInstr = nullptr;
+
+ for (; I != End; ++I) {
+ // If the intervening instruction I:
+ // * modifies I2's use reg
+ // * modifies I2's def reg
+ // * reads I2's def reg
+ // * or has unmodelled side effects
+ // we can't move I2 across it.
+ if (isUnsafeToMoveAcross(&*I, I2UseReg, I2DestReg, TRI)) {
+ isSafe = false;
+ break;
+ }
+
+ // Update first use of the killed operand.
+ if (!KillingInstr && KilledOperand &&
+ I->readsRegister(KilledOperand, TRI))
+ KillingInstr = &*I;
+ }
+ if (isSafe) {
+ // Update the intermediate instruction to with the kill flag.
+ if (KillingInstr) {
+ bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true);
+ (void)Added; // suppress compiler warning
+ assert(Added && "Must successfully update kill flag");
+ removeKillInfo(I2, KilledOperand);
+ }
+ DoInsertAtI1 = true;
+ return true;
+ }
+ }
+
+ // Try to move I1 towards I2.
+ {
+ // Look at instructions I in between I1 and (excluding) I2.
+ MachineBasicBlock::iterator I(I1), End(I2);
+ // At O3 we got better results (dhrystone) by being more conservative here.
+ if (!ShouldCombineAggressively)
+ End = std::next(MachineBasicBlock::iterator(I2));
+ unsigned I1UseReg = UseReg(I1->getOperand(1));
+ // Track killed operands. If we move across an instruction that kills our
+ // operand, we need to update the kill information on the moved I1. It kills
+ // the operand now.
+ MachineInstr *KillingInstr = nullptr;
+ unsigned KilledOperand = 0;
+
+ while(++I != End) {
+ // If the intervening instruction I:
+ // * modifies I1's use reg
+ // * modifies I1's def reg
+ // * reads I1's def reg
+ // * or has unmodelled side effects
+ // We introduce this special case because llvm has no api to remove a
+ // kill flag for a register (a removeRegisterKilled() analogous to
+ // addRegisterKilled) that handles aliased register correctly.
+ // * or has a killed aliased register use of I1's use reg
+ // %D4<def> = TFRI64 16
+ // %R6<def> = TFR %R9
+ // %R8<def> = KILL %R8, %D4<imp-use,kill>
+ // If we want to move R6 = across the KILL instruction we would have
+ // to remove the %D4<imp-use,kill> operand. For now, we are
+ // conservative and disallow the move.
+ // we can't move I1 across it.
+ if (isUnsafeToMoveAcross(I, I1UseReg, I1DestReg, TRI) ||
+ // Check for an aliased register kill. Bail out if we see one.
+ (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI)))
+ return false;
+
+ // Check for an exact kill (registers match).
+ if (I1UseReg && I->killsRegister(I1UseReg)) {
+ assert(!KillingInstr && "Should only see one killing instruction");
+ KilledOperand = I1UseReg;
+ KillingInstr = &*I;
+ }
+ }
+ if (KillingInstr) {
+ removeKillInfo(KillingInstr, KilledOperand);
+ // Update I1 to set the kill flag. This flag will later be picked up by
+ // the new COMBINE instruction.
+ bool Added = I1->addRegisterKilled(KilledOperand, TRI);
+ (void)Added; // suppress compiler warning
+ assert(Added && "Must successfully update kill flag");
+ }
+ DoInsertAtI1 = false;
+ }
+
+ return true;
+}
+
+/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be
+/// newified. (A use of a 64 bit register define can not be newified)
+void
+HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
+ DenseMap<unsigned, MachineInstr *> LastDef;
+ for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+ MachineInstr *MI = I;
+ // Mark TFRs that feed a potential new value store as such.
+ if(TII->mayBeNewStore(MI)) {
+ // Look for uses of TFR instructions.
+ for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE;
+ ++OpdIdx) {
+ MachineOperand &Op = MI->getOperand(OpdIdx);
+
+ // Skip over anything except register uses.
+ if (!Op.isReg() || !Op.isUse() || !Op.getReg())
+ continue;
+
+ // Look for the defining instruction.
+ unsigned Reg = Op.getReg();
+ MachineInstr *DefInst = LastDef[Reg];
+ if (!DefInst)
+ continue;
+ if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively))
+ continue;
+
+ // Only close newifiable stores should influence the decision.
+ MachineBasicBlock::iterator It(DefInst);
+ unsigned NumInstsToDef = 0;
+ while (&*It++ != MI)
+ ++NumInstsToDef;
+
+ if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR)
+ continue;
+
+ PotentiallyNewifiableTFR.insert(DefInst);
+ }
+ // Skip to next instruction.
+ continue;
+ }
+
+ // Put instructions that last defined integer or double registers into the
+ // map.
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || !Op.isDef() || !Op.getReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ LastDef[*SubRegs] = MI;
+ }
+ } else if (Hexagon::IntRegsRegClass.contains(Reg))
+ LastDef[Reg] = MI;
+ }
+ }
+}
+
+bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
+
+ if (IsCombinesDisabled) return false;
+
+ bool HasChanged = false;
+
+ // Get target info.
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+ // Combine aggressively (for code size)
+ ShouldCombineAggressively =
+ MF.getTarget().getOptLevel() <= CodeGenOpt::Default;
+
+ // Traverse basic blocks.
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ PotentiallyNewifiableTFR.clear();
+ findPotentialNewifiableTFRs(*BI);
+
+ // Traverse instructions in basic block.
+ for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end();
+ MI != End;) {
+ MachineInstr *I1 = MI++;
+ // Don't combine a TFR whose user could be newified (instructions that
+ // define double registers can not be newified - Programmer's Ref Manual
+ // 5.4.2 New-value stores).
+ if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1))
+ continue;
+
+ // Ignore instructions that are not combinable.
+ if (!isCombinableInstType(I1, TII, ShouldCombineAggressively))
+ continue;
+
+ // Find a second instruction that can be merged into a combine
+ // instruction.
+ bool DoInsertAtI1 = false;
+ MachineInstr *I2 = findPairable(I1, DoInsertAtI1);
+ if (I2) {
+ HasChanged = true;
+ combine(I1, I2, MI, DoInsertAtI1);
+ }
+ }
+ }
+
+ return HasChanged;
+}
+
+/// findPairable - Returns an instruction that can be merged with \p I1 into a
+/// COMBINE instruction or 0 if no such instruction can be found. Returns true
+/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1
+/// false if the combine must be inserted at the returned instruction.
+MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1,
+ bool &DoInsertAtI1) {
+ MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1));
+ unsigned I1DestReg = I1->getOperand(0).getReg();
+
+ for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End;
+ ++I2) {
+ // Bail out early if we see a second definition of I1DestReg.
+ if (I2->modifiesRegister(I1DestReg, TRI))
+ break;
+
+ // Ignore non-combinable instructions.
+ if (!isCombinableInstType(I2, TII, ShouldCombineAggressively))
+ continue;
+
+ // Don't combine a TFR whose user could be newified.
+ if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2))
+ continue;
+
+ unsigned I2DestReg = I2->getOperand(0).getReg();
+
+ // Check that registers are adjacent and that the first destination register
+ // is even.
+ bool IsI1LowReg = (I2DestReg - I1DestReg) == 1;
+ bool IsI2LowReg = (I1DestReg - I2DestReg) == 1;
+ unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg;
+ if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
+ continue;
+
+ // Check that the two instructions are combinable. V4 allows more
+ // instructions to be merged into a combine.
+ // The order matters because in a TFRI we might can encode a int8 as the
+ // hi reg operand but only a uint6 as the low reg operand.
+ if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) ||
+ (IsI1LowReg && !areCombinableOperations(TRI, I2, I1)))
+ break;
+
+ if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg,
+ DoInsertAtI1))
+ return I2;
+
+ // Not safe. Stop searching.
+ break;
+ }
+ return nullptr;
+}
+
+void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2,
+ MachineBasicBlock::iterator &MI,
+ bool DoInsertAtI1) {
+ // We are going to delete I2. If MI points to I2 advance it to the next
+ // instruction.
+ if ((MachineInstr *)MI == I2) ++MI;
+
+ // Figure out whether I1 or I2 goes into the lowreg part.
+ unsigned I1DestReg = I1->getOperand(0).getReg();
+ unsigned I2DestReg = I2->getOperand(0).getReg();
+ bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
+ unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;
+
+ // Get the double word register.
+ unsigned DoubleRegDest =
+ TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg,
+ &Hexagon::DoubleRegsRegClass);
+ assert(DoubleRegDest != 0 && "Expect a valid register");
+
+
+ // Setup source operands.
+ MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) :
+ I2->getOperand(1);
+ MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) :
+ I1->getOperand(1);
+
+ // Figure out which source is a register and which a constant.
+ bool IsHiReg = HiOperand.isReg();
+ bool IsLoReg = LoOperand.isReg();
+
+ MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2);
+ // Emit combine.
+ if (IsHiReg && IsLoReg)
+ emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+ else if (IsHiReg)
+ emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+ else if (IsLoReg)
+ emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+ else
+ emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+
+ I1->eraseFromParent();
+ I2->eraseFromParent();
+}
+
+void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
+ unsigned DoubleDestReg,
+ MachineOperand &HiOperand,
+ MachineOperand &LoOperand) {
+ DebugLoc DL = InsertPt->getDebugLoc();
+ MachineBasicBlock *BB = InsertPt->getParent();
+
+ // Handle globals.
+ if (HiOperand.isGlobal()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isGlobal()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Handle block addresses.
+ if (HiOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Handle jump tables.
+ if (HiOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Handle constant pools.
+ if (HiOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+ if (LoOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // First preference should be given to Hexagon::A2_combineii instruction
+ // as it can include U6 (in Hexagon::A4_combineii) as well.
+ // In this instruction, HiOperand is const extended, if required.
+ if (isInt<8>(LoOperand.getImm())) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+
+ // In this instruction, LoOperand is const extended, if required.
+ if (isInt<8>(HiOperand.getImm())) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addImm(LoOperand.getImm());
+ return;
+ }
+
+ // Insert new combine instruction.
+ // DoubleRegDest = combine #HiImm, #LoImm
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
+ unsigned DoubleDestReg,
+ MachineOperand &HiOperand,
+ MachineOperand &LoOperand) {
+ unsigned LoReg = LoOperand.getReg();
+ unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+
+ DebugLoc DL = InsertPt->getDebugLoc();
+ MachineBasicBlock *BB = InsertPt->getParent();
+
+ // Handle globals.
+ if (HiOperand.isGlobal()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Handle block addresses.
+ if (HiOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Handle jump tables.
+ if (HiOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Handle constant pools.
+ if (HiOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+ HiOperand.getTargetFlags())
+ .addReg(LoReg, LoRegKillFlag);
+ return;
+ }
+ // Insert new combine instruction.
+ // DoubleRegDest = combine #HiImm, LoReg
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+ .addImm(HiOperand.getImm())
+ .addReg(LoReg, LoRegKillFlag);
+}
+
+void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
+ unsigned DoubleDestReg,
+ MachineOperand &HiOperand,
+ MachineOperand &LoOperand) {
+ unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+ unsigned HiReg = HiOperand.getReg();
+
+ DebugLoc DL = InsertPt->getDebugLoc();
+ MachineBasicBlock *BB = InsertPt->getParent();
+
+ // Handle global.
+ if (LoOperand.isGlobal()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiReg, HiRegKillFlag)
+ .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+ // Handle block addresses.
+ if (LoOperand.isBlockAddress()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiReg, HiRegKillFlag)
+ .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+ // Handle jump tables.
+ if (LoOperand.isJTI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiOperand.getReg(), HiRegKillFlag)
+ .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+ return;
+ }
+ // Handle constant pools.
+ if (LoOperand.isCPI()) {
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiOperand.getReg(), HiRegKillFlag)
+ .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+ LoOperand.getTargetFlags());
+ return;
+ }
+
+ // Insert new combine instruction.
+ // DoubleRegDest = combine HiReg, #LoImm
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+ .addReg(HiReg, HiRegKillFlag)
+ .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
+ unsigned DoubleDestReg,
+ MachineOperand &HiOperand,
+ MachineOperand &LoOperand) {
+ unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+ unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+ unsigned LoReg = LoOperand.getReg();
+ unsigned HiReg = HiOperand.getReg();
+
+ DebugLoc DL = InsertPt->getDebugLoc();
+ MachineBasicBlock *BB = InsertPt->getParent();
+
+ // Insert new combine instruction.
+ // DoubleRegDest = combine HiReg, LoReg
+ BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg)
+ .addReg(HiReg, HiRegKillFlag)
+ .addReg(LoReg, LoRegKillFlag);
+}
+
+FunctionPass *llvm::createHexagonCopyToCombine() {
+ return new HexagonCopyToCombine();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
new file mode 100644
index 0000000..ee0c318
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -0,0 +1,1063 @@
+//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a Hexagon-specific if-conversion pass that runs on the
+// SSA form.
+// In SSA it is not straightforward to represent instructions that condi-
+// tionally define registers, since a conditionally-defined register may
+// only be used under the same condition on which the definition was based.
+// To avoid complications of this nature, this patch will only generate
+// predicated stores, and speculate other instructions from the "if-conver-
+// ted" block.
+// The code will recognize CFG patterns where a block with a conditional
+// branch "splits" into a "true block" and a "false block". Either of these
+// could be omitted (in case of a triangle, for example).
+// If after conversion of the side block(s) the CFG allows it, the resul-
+// ting blocks may be merged. If the "join" block contained PHI nodes, they
+// will be replaced with MUX (or MUX-like) instructions to maintain the
+// semantics of the PHI.
+//
+// Example:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
+// J2_jump <BB#4>, %PC<imp-def,dead>
+// Successors according to CFG: BB#4(62) BB#5(62)
+//
+// BB#4: derived from LLVM BB %if.then
+// Predecessors according to CFG: BB#3
+// %vreg11<def> = A2_addp %vreg6, %vreg10
+// S2_storerd_io %vreg32, 16, %vreg11
+// Successors according to CFG: BB#5
+//
+// BB#5: derived from LLVM BB %if.end
+// Predecessors according to CFG: BB#3 BB#4
+// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
+// %vreg13<def> = A2_addp %vreg7, %vreg12
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6(4) BB#3(124)
+//
+// would become:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10
+// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
+// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+// %vreg13<def> = A2_addp %vreg7, %vreg46
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6 BB#3
+
+#define DEBUG_TYPE "hexagon-eif"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonEarlyIfConversion();
+ void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
+}
+
+namespace {
+ cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
+ cl::init(false), cl::desc("Enable branch probability info"));
+ cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
+ cl::desc("Size limit in Hexagon early if-conversion"));
+
+ struct PrintMB {
+ PrintMB(const MachineBasicBlock *B) : MB(B) {}
+ const MachineBasicBlock *MB;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
+ if (!P.MB)
+ return OS << "<none>";
+ return OS << '#' << P.MB->getNumber();
+ }
+
+ struct FlowPattern {
+ FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+ FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
+ MachineBasicBlock *FB, MachineBasicBlock *JB)
+ : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
+
+ MachineBasicBlock *SplitB;
+ MachineBasicBlock *TrueB, *FalseB, *JoinB;
+ unsigned PredR;
+ };
+ struct PrintFP {
+ PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
+ : FP(P), TRI(T) {}
+ const FlowPattern &FP;
+ const TargetRegisterInfo &TRI;
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
+ };
+ raw_ostream &operator<<(raw_ostream &OS,
+ const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
+ OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
+ << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
+ << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
+ << PrintMB(P.FP.FalseB)
+ << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
+ return OS;
+ }
+
+ class HexagonEarlyIfConversion : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonEarlyIfConversion() : MachineFunctionPass(ID),
+ TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+ initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon early if conversion";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ typedef DenseSet<MachineBasicBlock*> BlockSetType;
+
+ bool isPreheader(const MachineBasicBlock *B) const;
+ bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
+ FlowPattern &FP);
+ bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
+ bool visitLoop(MachineLoop *L);
+
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool isValidCandidate(const MachineBasicBlock *B) const;
+ bool usesUndefVReg(const MachineInstr *MI) const;
+ bool isValid(const FlowPattern &FP) const;
+ unsigned countPredicateDefs(const MachineBasicBlock *B) const;
+ unsigned computePhiCost(MachineBasicBlock *B) const;
+ bool isProfitable(const FlowPattern &FP) const;
+ bool isPredicableStore(const MachineInstr *MI) const;
+ bool isSafeToSpeculate(const MachineInstr *MI) const;
+
+ unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
+ void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
+ MachineInstr *MI, unsigned PredR, bool IfTrue);
+ void predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue);
+
+ void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
+ void convert(const FlowPattern &FP);
+
+ void removeBlock(MachineBasicBlock *B);
+ void eliminatePhis(MachineBasicBlock *B);
+ void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
+ void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
+ void simplifyFlowGraph(const FlowPattern &FP);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MFN;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ MachineLoopInfo *MLI;
+ BlockSetType Deleted;
+ const MachineBranchProbabilityInfo *MBPI;
+ };
+
+ char HexagonEarlyIfConversion::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
+ "Hexagon early if conversion", false, false)
+
+bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
+ if (B->succ_size() != 1)
+ return false;
+ MachineBasicBlock *SB = *B->succ_begin();
+ MachineLoop *L = MLI->getLoopFor(SB);
+ return L && SB == L->getHeader();
+}
+
+
+bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
+ MachineLoop *L, FlowPattern &FP) {
+ DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
+
+ // Interested only in conditional branches, no .new, no new-value, etc.
+ // Check the terminators directly, it's easier than handling all responses
+ // from AnalyzeBranch.
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
+ if (T1I == B->end())
+ return false;
+ unsigned Opc = T1I->getOpcode();
+ if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
+ return false;
+ unsigned PredR = T1I->getOperand(0).getReg();
+
+ // Get the layout successor, or 0 if B does not have one.
+ MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
+ MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+
+ MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
+ MachineBasicBlock::const_iterator T2I = std::next(T1I);
+ // The second terminator should be an unconditional branch.
+ assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
+ MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
+ : T2I->getOperand(0).getMBB();
+ if (T1B == T2B) {
+ // XXX merge if T1B == NextB, or convert branch to unconditional.
+ // mark as diamond with both sides equal?
+ return false;
+ }
+ // Loop could be null for both.
+ if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
+ return false;
+
+ // Record the true/false blocks in such a way that "true" means "if (PredR)",
+ // and "false" means "if (!PredR)".
+ if (Opc == Hexagon::J2_jumpt)
+ TB = T1B, FB = T2B;
+ else
+ TB = T2B, FB = T1B;
+
+ if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
+ return false;
+
+ // Detect triangle first. In case of a triangle, one of the blocks TB/FB
+ // can fall through into the other, in other words, it will be executed
+ // in both cases. We only want to predicate the block that is executed
+ // conditionally.
+ unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
+ unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
+
+ // A block is predicable if it has one predecessor (it must be B), and
+ // it has a single successor. In fact, the block has to end either with
+ // an unconditional branch (which can be predicated), or with a fall-
+ // through.
+ bool TOk = (TNP == 1) && (TNS == 1);
+ bool FOk = (FNP == 1) && (FNS == 1);
+
+ // If neither is predicable, there is nothing interesting.
+ if (!TOk && !FOk)
+ return false;
+
+ MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
+ MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
+ MachineBasicBlock *JB = 0;
+
+ if (TOk) {
+ if (FOk) {
+ if (TSB == FSB)
+ JB = TSB;
+ // Diamond: "if (P) then TB; else FB;".
+ } else {
+ // TOk && !FOk
+ if (TSB == FB) {
+ JB = FB;
+ FB = 0;
+ }
+ }
+ } else {
+ // !TOk && FOk (at least one must be true by now).
+ if (FSB == TB) {
+ JB = TB;
+ TB = 0;
+ }
+ }
+ // Don't try to predicate loop preheaders.
+ if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
+ DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+ << " is a loop preheader. Skipping.\n");
+ return false;
+ }
+
+ FP = FlowPattern(B, PredR, TB, FB, JB);
+ DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+ return true;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// contains EH_LABEL.
+bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// that a block can never fall-through.
+bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
+ return true;
+ ++I;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
+ const {
+ if (!B)
+ return true;
+ if (B->isEHPad() || B->hasAddressTaken())
+ return false;
+ if (B->succ_size() == 0)
+ return false;
+
+ for (auto &MI : *B) {
+ if (MI.isDebugValue())
+ continue;
+ if (MI.isConditionalBranch())
+ return false;
+ unsigned Opc = MI.getOpcode();
+ bool IsJMP = (Opc == Hexagon::J2_jump);
+ if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
+ return false;
+ // Look for predicate registers defined by this instruction. It's ok
+ // to speculate such an instruction, but the predicate register cannot
+ // be used outside of this block (or else it won't be possible to
+ // update the use of it after predication). PHI uses will be updated
+ // to use a result of a MUX, and a MUX cannot be created for predicate
+ // registers.
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
+ continue;
+ for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
+ if (U->getParent()->isPHI())
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isUse())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ const MachineInstr *DefI = MRI->getVRegDef(R);
+ // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
+ assert(DefI && "Expecting a reaching def in MRI");
+ if (DefI->isImplicitDef())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
+ if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition
+ return false;
+ if (FP.TrueB && !isValidCandidate(FP.TrueB))
+ return false;
+ if (FP.FalseB && !isValidCandidate(FP.FalseB))
+ return false;
+ // Check the PHIs in the join block. If any of them use a register
+ // that is defined as IMPLICIT_DEF, do not convert this. This can
+ // legitimately happen if one side of the split never executes, but
+ // the compiler is unable to prove it. That side may then seem to
+ // provide an "undef" value to the join block, however it will never
+ // execute at run-time. If we convert this case, the "undef" will
+ // be used in a MUX instruction, and that may seem like actually
+ // using an undefined value to other optimizations. This could lead
+ // to trouble further down the optimization stream, cause assertions
+ // to fail, etc.
+ if (FP.JoinB) {
+ const MachineBasicBlock &B = *FP.JoinB;
+ for (auto &MI : B) {
+ if (!MI.isPHI())
+ break;
+ if (usesUndefVReg(&MI))
+ return false;
+ unsigned DefR = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ if (RC == &Hexagon::PredRegsRegClass)
+ return false;
+ }
+ }
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
+ assert(B->pred_size() <= 2);
+ if (B->pred_size() < 2)
+ return 0;
+
+ unsigned Cost = 0;
+ MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
+ for (I = B->begin(); I != E; ++I) {
+ const MachineOperand &RO1 = I->getOperand(1);
+ const MachineOperand &RO3 = I->getOperand(3);
+ assert(RO1.isReg() && RO3.isReg());
+ // Must have a MUX if the phi uses a subregister.
+ if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+ Cost++;
+ continue;
+ }
+ MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
+ MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+ if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
+ Cost++;
+ }
+ return Cost;
+}
+
+
+unsigned HexagonEarlyIfConversion::countPredicateDefs(
+ const MachineBasicBlock *B) const {
+ unsigned PredDefs = 0;
+ for (auto &MI : *B) {
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+ PredDefs++;
+ }
+ }
+ return PredDefs;
+}
+
+
+bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+ if (FP.TrueB && FP.FalseB) {
+
+ // Do not IfCovert if the branch is one sided.
+ if (MBPI) {
+ BranchProbability Prob(9, 10);
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
+ return false;
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
+ return false;
+ }
+
+ // If both sides are predicable, convert them if they join, and the
+ // join block has no other predecessors.
+ MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
+ MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
+ if (TSB != FSB)
+ return false;
+ if (TSB->pred_size() != 2)
+ return false;
+ }
+
+ // Calculate the total size of the predicated blocks.
+ // Assume instruction counts without branches to be the approximation of
+ // the code size. If the predicated blocks are smaller than a packet size,
+ // approximate the spare room in the packet that could be filled with the
+ // predicated/speculated instructions.
+ unsigned TS = 0, FS = 0, Spare = 0;
+ if (FP.TrueB) {
+ TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
+ if (TS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ if (FP.FalseB) {
+ FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
+ if (FS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ unsigned TotalIn = TS+FS;
+ DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
+ << TotalIn << ", spare room: " << Spare << "\n");
+ if (TotalIn >= SizeLimit+Spare)
+ return false;
+
+ // Count the number of PHI nodes that will need to be updated (converted
+ // to MUX). Those can be later converted to predicated instructions, so
+ // they aren't always adding extra cost.
+ // KLUDGE: Also, count the number of predicate register definitions in
+ // each block. The scheduler may increase the pressure of these and cause
+ // expensive spills (e.g. bitmnp01).
+ unsigned TotalPh = 0;
+ unsigned PredDefs = countPredicateDefs(FP.SplitB);
+ if (FP.JoinB) {
+ TotalPh = computePhiCost(FP.JoinB);
+ PredDefs += countPredicateDefs(FP.JoinB);
+ } else {
+ if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.TrueB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.FalseB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ }
+ DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+ << TotalPh << "\n");
+ if (TotalIn+TotalPh >= SizeLimit+Spare)
+ return false;
+
+ DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+ if (PredDefs > 4)
+ return false;
+
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
+ MachineLoop *L) {
+ bool Changed = false;
+
+ // Visit all dominated blocks from the same loop first, then process B.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ // We will change CFG/DT during this traversal, so take precautions to
+ // avoid problems related to invalidated iterators. In fact, processing
+ // a child C of B cannot cause another child to be removed, but it can
+ // cause a new child to be added (which was a child of C before C itself
+ // was removed. This new child C, however, would have been processed
+ // prior to processing B, so there is no need to process it again.
+ // Simply keep a list of children of B, and traverse that list.
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ if (!Deleted.count(SB))
+ Changed |= visitBlock(SB, L);
+ }
+ // When walking down the dominator tree, we want to traverse through
+ // blocks from nested (other) loops, because they can dominate blocks
+ // that are in L. Skip the non-L blocks only after the tree traversal.
+ if (MLI->getLoopFor(B) != L)
+ return Changed;
+
+ FlowPattern FP;
+ if (!matchFlowPattern(B, L, FP))
+ return Changed;
+
+ if (!isValid(FP)) {
+ DEBUG(dbgs() << "Conversion is not valid\n");
+ return Changed;
+ }
+ if (!isProfitable(FP)) {
+ DEBUG(dbgs() << "Conversion is not profitable\n");
+ return Changed;
+ }
+
+ convert(FP);
+ simplifyFlowGraph(FP);
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
+ MachineBasicBlock *HB = L ? L->getHeader() : 0;
+ DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+ : dbgs() << "Visiting function") << "\n");
+ bool Changed = false;
+ if (L) {
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ }
+
+ MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
+ Changed |= visitBlock(L ? HB : EntryB, L);
+ return Changed;
+}
+
+
+bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
+ const {
+ // Exclude post-increment stores. Those return a value, so we cannot
+ // predicate them.
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+ switch (Opc) {
+ // Store byte:
+ case S2_storerb_io: case S4_storerb_rr:
+ case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp:
+ // Store halfword:
+ case S2_storerh_io: case S4_storerh_rr:
+ case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp:
+ // Store upper halfword:
+ case S2_storerf_io: case S4_storerf_rr:
+ case S2_storerfabs: case S2_storerfgp:
+ // Store word:
+ case S2_storeri_io: case S4_storeri_rr:
+ case S2_storeriabs: case S4_storeiri_io: case S2_storerigp:
+ // Store doubleword:
+ case S2_storerd_io: case S4_storerd_rr:
+ case S2_storerdabs: case S2_storerdgp:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
+ const {
+ if (MI->mayLoad() || MI->mayStore())
+ return false;
+ if (MI->isCall() || MI->isBarrier() || MI->isBranch())
+ return false;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
+ bool IfTrue) const {
+ // Exclude post-increment stores.
+ using namespace Hexagon;
+ switch (Opc) {
+ case S2_storerb_io:
+ return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
+ case S4_storerb_rr:
+ return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
+ case S2_storerbabs:
+ case S2_storerbgp:
+ return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
+ case S4_storeirb_io:
+ return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
+ case S2_storerh_io:
+ return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
+ case S4_storerh_rr:
+ return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
+ case S2_storerhabs:
+ case S2_storerhgp:
+ return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
+ case S2_storerf_io:
+ return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
+ case S4_storerf_rr:
+ return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
+ case S2_storerfabs:
+ case S2_storerfgp:
+ return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
+ case S4_storeirh_io:
+ return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
+ case S2_storeri_io:
+ return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
+ case S4_storeri_rr:
+ return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
+ case S2_storeriabs:
+ case S2_storerigp:
+ return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
+ case S4_storeiri_io:
+ return IfTrue ? S4_storeirit_io : S4_storeirif_io;
+ case S2_storerd_io:
+ return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
+ case S4_storerd_rr:
+ return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
+ case S2_storerdabs:
+ case S2_storerdgp:
+ return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
+ }
+ llvm_unreachable("Unexpected opcode");
+ return 0;
+}
+
+
+void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineInstr *MI,
+ unsigned PredR, bool IfTrue) {
+ DebugLoc DL;
+ if (At != ToB->end())
+ DL = At->getDebugLoc();
+ else if (!ToB->empty())
+ DL = ToB->back().getDebugLoc();
+
+ unsigned Opc = MI->getOpcode();
+
+ if (isPredicableStore(MI)) {
+ unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
+ assert(COpc);
+ MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
+ .addReg(PredR);
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ MIB.addOperand(*MO);
+
+ // Set memory references.
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ MI->eraseFromParent();
+ return;
+ }
+
+ if (Opc == Hexagon::J2_jump) {
+ MachineBasicBlock *TB = MI->getOperand(0).getMBB();
+ const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+ : Hexagon::J2_jumpf);
+ BuildMI(*ToB, At, DL, D)
+ .addReg(PredR)
+ .addMBB(TB);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Print the offending instruction unconditionally as we are about to
+ // abort.
+ dbgs() << *MI;
+ llvm_unreachable("Unexpected instruction");
+}
+
+
+// Predicate/speculate non-branch instructions from FromB into block ToB.
+// Leave the branches alone, they will be handled later. Btw, at this point
+// FromB should have at most one branch, and it should be unconditional.
+void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue) {
+ DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+ MachineBasicBlock::iterator End = FromB->getFirstTerminator();
+ MachineBasicBlock::iterator I, NextI;
+
+ for (I = FromB->begin(); I != End; I = NextI) {
+ assert(!I->isPHI());
+ NextI = std::next(I);
+ if (isSafeToSpeculate(&*I))
+ ToB->splice(At, FromB, I);
+ else
+ predicateInstr(ToB, At, &*I, PredR, IfTrue);
+ }
+}
+
+
+void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
+ const FlowPattern &FP) {
+ // Visit all PHI nodes in the WhereB block and generate MUX instructions
+ // in the split block. Update the PHI nodes with the values of the MUX.
+ auto NonPHI = WhereB->getFirstNonPHI();
+ for (auto I = WhereB->begin(); I != NonPHI; ++I) {
+ MachineInstr *PN = &*I;
+ // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
+ unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
+ if (BO.getMBB() == FP.SplitB)
+ SR = RO.getReg(), SSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.TrueB)
+ TR = RO.getReg(), TSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.FalseB)
+ FR = RO.getReg(), FSR = RO.getSubReg();
+ else
+ continue;
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ if (TR == 0)
+ TR = SR, TSR = SSR;
+ else if (FR == 0)
+ FR = SR, FSR = SSR;
+ assert(TR && FR);
+
+ using namespace Hexagon;
+ unsigned DR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
+ : TII->get(MUX64_rr);
+
+ MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
+ DebugLoc DL;
+ if (MuxAt != FP.SplitB->end())
+ DL = MuxAt->getDebugLoc();
+ unsigned MuxR = MRI->createVirtualRegister(RC);
+ BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
+ .addReg(FP.PredR)
+ .addReg(TR, 0, TSR)
+ .addReg(FR, 0, FSR);
+
+ PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+ PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
+ }
+}
+
+
+void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
+ MachineBasicBlock *TSB = 0, *FSB = 0;
+ MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
+ assert(OldTI != FP.SplitB->end());
+ DebugLoc DL = OldTI->getDebugLoc();
+
+ if (FP.TrueB) {
+ TSB = *FP.TrueB->succ_begin();
+ predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
+ }
+ if (FP.FalseB) {
+ FSB = *FP.FalseB->succ_begin();
+ MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
+ predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
+ }
+
+ // Regenerate new terminators in the split block and update the successors.
+ // First, remember any information that may be needed later and remove the
+ // existing terminators/successors from the split block.
+ MachineBasicBlock *SSB = 0;
+ FP.SplitB->erase(OldTI, FP.SplitB->end());
+ while (FP.SplitB->succ_size() > 0) {
+ MachineBasicBlock *T = *FP.SplitB->succ_begin();
+ // It's possible that the split block had a successor that is not a pre-
+ // dicated block. This could only happen if there was only one block to
+ // be predicated. Example:
+ // split_b:
+ // if (p) jump true_b
+ // jump unrelated2_b
+ // unrelated1_b:
+ // ...
+ // unrelated2_b: ; can have other predecessors, so it's not "false_b"
+ // jump other_b
+ // true_b: ; only reachable from split_b, can be predicated
+ // ...
+ //
+ // Find this successor (SSB) if it exists.
+ if (T != FP.TrueB && T != FP.FalseB) {
+ assert(!SSB);
+ SSB = T;
+ }
+ FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
+ }
+
+ // Insert new branches and update the successors of the split block. This
+ // may create unconditional branches to the layout successor, etc., but
+ // that will be cleaned up later. For now, make sure that correct code is
+ // generated.
+ if (FP.JoinB) {
+ assert(!SSB || SSB == FP.JoinB);
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(FP.JoinB);
+ FP.SplitB->addSuccessor(FP.JoinB);
+ } else {
+ bool HasBranch = false;
+ if (TSB) {
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+ .addReg(FP.PredR)
+ .addMBB(TSB);
+ FP.SplitB->addSuccessor(TSB);
+ HasBranch = true;
+ }
+ if (FSB) {
+ const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
+ : TII->get(Hexagon::J2_jumpf);
+ MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
+ if (!HasBranch)
+ MIB.addReg(FP.PredR);
+ MIB.addMBB(FSB);
+ FP.SplitB->addSuccessor(FSB);
+ }
+ if (SSB) {
+ // This cannot happen if both TSB and FSB are set. [TF]SB are the
+ // successor blocks of the TrueB and FalseB (or null of the TrueB
+ // or FalseB block is null). SSB is the potential successor block
+ // of the SplitB that is neither TrueB nor FalseB.
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(SSB);
+ FP.SplitB->addSuccessor(SSB);
+ }
+ }
+
+ // What is left to do is to update the PHI nodes that could have entries
+ // referring to predicated blocks.
+ if (FP.JoinB) {
+ updatePhiNodes(FP.JoinB, FP);
+ } else {
+ if (TSB)
+ updatePhiNodes(TSB, FP);
+ if (FSB)
+ updatePhiNodes(FSB, FP);
+ // Nothing to update in SSB, since SSB's predecessors haven't changed.
+ }
+}
+
+
+void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+
+ // Transfer the immediate dominator information from B to its descendants.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ MachineDomTreeNode *IDN = N->getIDom();
+ if (IDN) {
+ MachineBasicBlock *IDB = IDN->getBlock();
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ MDT->changeImmediateDominator(SB, IDB);
+ }
+ }
+
+ while (B->succ_size() > 0)
+ B->removeSuccessor(B->succ_begin());
+
+ for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
+ (*I)->removeSuccessor(B, true);
+
+ Deleted.insert(B);
+ MDT->eraseNode(B);
+ MFN->erase(B->getIterator());
+}
+
+
+void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+ MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
+ for (I = B->begin(); I != NonPHI; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *PN = &*I;
+ assert(PN->getNumOperands() == 3 && "Invalid phi node");
+ MachineOperand &UO = PN->getOperand(1);
+ unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
+ unsigned DefR = PN->getOperand(0).getReg();
+ unsigned NewR = UseR;
+ if (UseSR) {
+ // MRI.replaceVregUsesWith does not allow to update the subregister,
+ // so instead of doing the use-iteration here, create a copy into a
+ // "non-subregistered" register.
+ DebugLoc DL = PN->getDebugLoc();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ NewR = MRI->createVirtualRegister(RC);
+ NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+ .addReg(UseR, 0, UseSR);
+ }
+ MRI->replaceRegWith(DefR, NewR);
+ B->erase(I);
+ }
+}
+
+
+void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
+ MachineBasicBlock *NewB) {
+ for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *SB = *I;
+ MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
+ for (P = SB->begin(); P != N; ++P) {
+ MachineInstr *PN = &*P;
+ for (MIOperands MO(PN); MO.isValid(); ++MO)
+ if (MO->isMBB() && MO->getMBB() == OldB)
+ MO->setMBB(NewB);
+ }
+ }
+}
+
+
+void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
+ MachineBasicBlock *SuccB) {
+ DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+ << PrintMB(SuccB) << "\n");
+ bool TermOk = hasUncondBranch(SuccB);
+ eliminatePhis(SuccB);
+ TII->RemoveBranch(*PredB);
+ PredB->removeSuccessor(SuccB);
+ PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
+ MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
+ for (I = SuccB->succ_begin(); I != E; ++I)
+ PredB->addSuccessor(*I);
+ PredB->normalizeSuccProbs();
+ replacePhiEdges(SuccB, PredB);
+ removeBlock(SuccB);
+ if (!TermOk)
+ PredB->updateTerminator();
+}
+
+
+void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+ if (FP.TrueB)
+ removeBlock(FP.TrueB);
+ if (FP.FalseB)
+ removeBlock(FP.FalseB);
+
+ FP.SplitB->updateTerminator();
+ if (FP.SplitB->succ_size() != 1)
+ return;
+
+ MachineBasicBlock *SB = *FP.SplitB->succ_begin();
+ if (SB->pred_size() != 1)
+ return;
+
+ // By now, the split block has only one successor (SB), and SB has only
+ // one predecessor. We can try to merge them. We will need to update ter-
+ // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+ // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
+ // with an unconditional branch, we won't need to touch the terminators.
+ if (!hasEHLabel(SB) || hasUncondBranch(SB))
+ mergeBlocks(FP.SplitB, SB);
+}
+
+
+bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MFN = &MF;
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+ nullptr;
+
+ Deleted.clear();
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ Changed |= visitLoop(0);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+FunctionPass *llvm::createHexagonEarlyIfConversion() {
+ return new HexagonEarlyIfConversion();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..ce10aea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1357 @@
+//===--- HexagonExpandCondsets.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+// vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+// vreg0 = A2_tfrt vreg0, vreg1
+// vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+// vreg0 = A2_or vreg1, vreg2
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+// vreg0 = A2_or vreg1, vreg2
+// ... [intervening instructions] ...
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+// vreg3 = A2_port vreg99, vreg1, vreg2
+// ... [intervening instructions, vreg0->vreg3] ...
+// [deleted]
+// variant "down":
+// [deleted]
+// ... [intervening instructions] ...
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+// vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+// vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+// vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+ void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+ class HexagonExpandCondsets : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonExpandCondsets() :
+ MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+ LIS(0), CoaLimitActive(false),
+ TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+ if (OptCoaLimit.getPosition())
+ CoaLimitActive = true, CoaLimit = OptCoaLimit;
+ if (OptTfrLimit.getPosition())
+ TfrLimitActive = true, TfrLimit = OptTfrLimit;
+ initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Expand Condsets";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ const HexagonInstrInfo *HII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+ bool CoaLimitActive, TfrLimitActive;
+ unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+ struct RegisterRef {
+ RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+ Sub(Op.getSubReg()) {}
+ RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+ bool operator== (RegisterRef RR) const {
+ return Reg == RR.Reg && Sub == RR.Sub;
+ }
+ bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+ unsigned Reg, Sub;
+ };
+
+ typedef DenseMap<unsigned,unsigned> ReferenceMap;
+ enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+ enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+ unsigned getMaskForSub(unsigned Sub);
+ bool isCondset(const MachineInstr *MI);
+
+ void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+ bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+ LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+ LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+ void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+ void makeUndead(unsigned Reg, SlotIndex S);
+ void shrinkToUses(unsigned Reg, LiveInterval &LI);
+ void updateKillFlags(unsigned Reg, LiveInterval &LI);
+ void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+ LiveInterval &LI);
+ void addInstrToLiveness(MachineInstr *MI);
+ void removeInstrFromLiveness(MachineInstr *MI);
+
+ unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+ MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+ unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+ bool split(MachineInstr *MI);
+ bool splitInBlock(MachineBasicBlock &B);
+
+ bool isPredicable(MachineInstr *MI);
+ MachineInstr *getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+ bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+ bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+ void predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+ void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+ bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last);
+ bool predicate(MachineInstr *TfrI, bool Cond);
+ bool predicateInBlock(MachineBasicBlock &B);
+
+ void postprocessUndefImplicitUses(MachineBasicBlock &B);
+ void removeImplicitUses(MachineInstr *MI);
+ void removeImplicitUses(MachineBasicBlock &B);
+
+ bool isIntReg(RegisterRef RR, unsigned &BW);
+ bool isIntraBlocks(LiveInterval &LI);
+ bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+ bool coalesceSegments(MachineFunction &MF);
+ };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+ switch (Sub) {
+ case Hexagon::subreg_loreg:
+ return Sub_Low;
+ case Hexagon::subreg_hireg:
+ return Sub_High;
+ case Hexagon::NoSubRegister:
+ return Sub_None;
+ }
+ llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::C2_mux:
+ case Hexagon::C2_muxii:
+ case Hexagon::C2_muxir:
+ case Hexagon::C2_muxri:
+ case Hexagon::MUX64_rr:
+ return true;
+ break;
+ }
+ return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ Map.insert(std::make_pair(RR.Reg, Mask));
+ else
+ F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ return false;
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ if (Mask & F->second)
+ return true;
+ return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+ SlotIndex S) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->start >= S)
+ return I;
+ }
+ return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+ SlotIndex S) {
+ LiveInterval::iterator P = LI.end();
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->end > S)
+ return P;
+ P = I;
+ }
+ return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+ bool SetDef) {
+ if (!S.isRegister())
+ return;
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ assert(MI && "Expecting instruction");
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ bool IsDef = !Op.isUndef();
+ if (Op.isImplicit() && IsDef != SetDef)
+ Op.setIsUndef(!SetDef);
+ }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+ // If S is a block boundary, then there can still be a dead def reaching
+ // this point. Instead of traversing the CFG, queue start points of all
+ // live segments that begin with a register, and end at a block boundary.
+ // This may "resurrect" some truly dead definitions, but doing so is
+ // harmless.
+ SmallVector<MachineInstr*,8> Defs;
+ if (S.isBlock()) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isRegister() || !I->end.isBlock())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+ Defs.push_back(MI);
+ }
+ } else if (S.isRegister()) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ Defs.push_back(MI);
+ }
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ MachineInstr *MI = Defs[i];
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+ SmallVector<MachineInstr*,4> Deads;
+ LIS->shrinkToUses(&LI, &Deads);
+ // Need to undo the deadification made by "shrinkToUses". It's easier to
+ // do it here, since we have a list of all instructions that were just
+ // marked as dead.
+ for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+ MachineInstr *MI = Deads[i];
+ // Clear the "dead" flag.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ // Extend the live segment to the beginning of the next one.
+ LiveInterval::iterator End = LI.end();
+ SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+ LiveInterval::iterator T = LI.FindSegmentContaining(S);
+ assert(T != End);
+ LiveInterval::iterator N = std::next(T);
+ if (N != End)
+ T->end = N->start;
+ else
+ T->end = LIS->getMBBEndIdx(MI->getParent());
+ }
+ updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+ MRI->clearKillFlags(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ SlotIndex EX = I->end;
+ if (!EX.isRegister())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ // Only set the kill flag on the first encountered use of Reg in this
+ // instruction.
+ Op.setIsKill(true);
+ break;
+ }
+ }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+ SlotIndex S, LiveInterval &LI) {
+ // Terminate the live segment pointed to by LT within a live interval LI.
+ if (LT == LI.end())
+ return;
+
+ VNInfo *OldVN = LT->valno;
+ SlotIndex EX = LT->end;
+ LT->end = S;
+ // If LT does not end at a block boundary, the termination is done.
+ if (!EX.isBlock())
+ return;
+
+ // If LT ended at a block boundary, it's possible that its value number
+ // is picked up at the beginning other blocks. Create a new value number
+ // and change such blocks to use it instead.
+ VNInfo *NewVN = 0;
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isBlock() || I->valno != OldVN)
+ continue;
+ // Generate on-demand a new value number that is defined by the
+ // block beginning (i.e. -phi).
+ if (!NewVN)
+ NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+ I->valno = NewVN;
+ }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+ : LIS->getInstructionIndex(MI);
+ DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI);
+
+ MX = MX.getRegSlot();
+ bool Predicated = HII->isPredicated(MI);
+ MachineBasicBlock *MB = MI->getParent();
+
+ // Strip all implicit uses from predicated instructions. They will be
+ // added again, according to the updated information.
+ if (Predicated)
+ removeImplicitUses(MI);
+
+ // For each def in MI we need to insert a new live segment starting at MX
+ // into the interval. If there already exists a live segment in the interval
+ // that contains MX, we need to terminate it at MX.
+ SmallVector<RegisterRef,2> Defs;
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Defs.push_back(RegisterRef(Op));
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ unsigned DefR = Defs[i].Reg;
+ LiveInterval &LID = LIS->getInterval(DefR);
+ DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ // If MX falls inside of an existing live segment, terminate it.
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ if (LT != LID.end())
+ terminateSegment(LT, MX, LID);
+ DEBUG(dbgs() << "after terminating segment\n " << LID << "\n");
+
+ // Create a new segment starting from MX.
+ LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+ SlotIndex EX;
+ VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+ if (N == LID.end()) {
+ // There is no live segment after MX. End this segment at the end of
+ // the block.
+ EX = LIS->getMBBEndIdx(MB);
+ } else {
+ // If the next segment starts at the block boundary, end the new segment
+ // at the boundary of the preceding block (i.e. the previous index).
+ // Otherwise, end the segment at the beginning of the next segment. In
+ // either case it will be "shrunk-to-uses" later.
+ EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+ }
+ if (Predicated) {
+ // Predicated instruction will have an implicit use of the defined
+ // register. This is necessary so that this definition will not make
+ // any previous definitions dead. If there are no previous live
+ // segments, still add the implicit use, but make it "undef".
+ // Because of the implicit use, the preceding definition is not
+ // dead. Mark is as such (if necessary).
+ MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+ ImpUse.setSubReg(Defs[i].Sub);
+ bool Undef = false;
+ if (P == LID.end())
+ Undef = true;
+ else {
+ // If the previous segment extends to the end of the previous block,
+ // the end index may actually be the beginning of this block. If
+ // the previous segment ends at a block boundary, move it back by one,
+ // to get the proper block for it.
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+ Undef = true;
+ }
+ if (!Undef) {
+ makeUndead(DefR, P->valno->def);
+ // We are adding a live use, so extend the previous segment to
+ // include it.
+ P->end = MX;
+ } else {
+ ImpUse.setIsUndef(true);
+ }
+
+ if (!MI->readsRegister(DefR))
+ MI->addOperand(ImpUse);
+ if (N != LID.end())
+ makeDefined(DefR, N->start, true);
+ }
+ LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+ LID.addSegment(NR);
+ DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n");
+ shrinkToUses(DefR, LID);
+ DEBUG(dbgs() << "updated imp-uses: " << *MI);
+ LID.verify();
+ }
+
+ // For each use in MI:
+ // - If there is no live segment that contains MX for the used register,
+ // extend the previous one. Ignore implicit uses.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+ continue;
+ unsigned UseR = Op.getReg();
+ LiveInterval &LIU = LIS->getInterval(UseR);
+ // Find the last segment P that starts before MX.
+ LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+ if (P == LIU.end())
+ P = prevSegment(LIU, MX);
+
+ assert(P != LIU.end() && "MI uses undefined register?");
+ SlotIndex EX = P->end;
+ // If P contains MX, there is not much to do.
+ if (EX > MX) {
+ Op.setIsKill(false);
+ continue;
+ }
+ // Otherwise, extend P to "next(MX)".
+ P->end = MX.getNextIndex();
+ Op.setIsKill(true);
+ // Get the old "kill" instruction, and remove the kill flag.
+ if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+ KI->clearRegisterKills(UseR, nullptr);
+ shrinkToUses(UseR, LIU);
+ LIU.verify();
+ }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+ DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI);
+
+ // For each def in MI:
+ // If MI starts a live segment, merge this segment with the previous segment.
+ //
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned DefR = Op.getReg();
+ LiveInterval &LID = LIS->getInterval(DefR);
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ assert(LT != LID.end() && "Expecting live segments");
+ DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ if (LT->start != MX)
+ continue;
+
+ VNInfo *MVN = LT->valno;
+ if (LT != LID.begin()) {
+ // If the current live segment is not the first, the task is easy. If
+ // the previous segment continues into the current block, extend it to
+ // the end of the current one, and merge the value numbers.
+ // Otherwise, remove the current segment, and make the end of it "undef".
+ LiveInterval::iterator P = std::prev(LT);
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *MB = MI->getParent();
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+ makeDefined(DefR, LT->end, false);
+ LID.removeSegment(*LT);
+ } else {
+ // Make the segments adjacent, so that merge-vn can also merge the
+ // segments.
+ P->end = LT->start;
+ makeUndead(DefR, P->valno->def);
+ LID.MergeValueNumberInto(MVN, P->valno);
+ }
+ } else {
+ LiveInterval::iterator N = std::next(LT);
+ LiveInterval::iterator RmB = LT, RmE = N;
+ while (N != LID.end()) {
+ // Iterate until the first register-based definition is found
+ // (i.e. skip all block-boundary entries).
+ LiveInterval::iterator Next = std::next(N);
+ if (N->start.isRegister()) {
+ makeDefined(DefR, N->start, false);
+ break;
+ }
+ if (N->end.isRegister()) {
+ makeDefined(DefR, N->end, false);
+ RmE = Next;
+ break;
+ }
+ RmE = Next;
+ N = Next;
+ }
+ // Erase the segments in one shot to avoid invalidating iterators.
+ LID.segments.erase(RmB, RmE);
+ }
+
+ bool VNUsed = false;
+ for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+ if (I->valno != MVN)
+ continue;
+ VNUsed = true;
+ break;
+ }
+ if (!VNUsed)
+ MVN->markUnused();
+
+ DEBUG(dbgs() << "new interval: ");
+ if (!LID.empty()) {
+ DEBUG(dbgs() << LID << "\n");
+ LID.verify();
+ } else {
+ DEBUG(dbgs() << "<empty>\n");
+ LIS->removeInterval(DefR);
+ }
+ }
+
+ // For uses there is nothing to do. The intervals will be updated via
+ // shrinkToUses.
+ SmallVector<unsigned,4> Uses;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.push_back(R);
+ }
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+ LiveInterval &LI = LIS->getInterval(Uses[i]);
+ shrinkToUses(Uses[i], LI);
+ }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+ bool Cond) {
+ using namespace Hexagon;
+ if (SO.isReg()) {
+ unsigned PhysR;
+ RegisterRef RS = SO;
+ if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+ const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+ assert(VC->begin() != VC->end() && "Empty register class");
+ PhysR = *VC->begin();
+ } else {
+ assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+ PhysR = RS.Reg;
+ }
+ unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+ switch (RC->getSize()) {
+ case 4:
+ return Cond ? A2_tfrt : A2_tfrf;
+ case 8:
+ return Cond ? A2_tfrpt : A2_tfrpf;
+ }
+ llvm_unreachable("Invalid register operand");
+ }
+ if (SO.isImm() || SO.isFPImm())
+ return Cond ? C2_cmoveit : C2_cmoveif;
+ llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+ unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+ MachineInstr *MI = SrcOp.getParent();
+ MachineBasicBlock &B = *MI->getParent();
+ MachineBasicBlock::iterator At = MI;
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Don't avoid identity copies here (i.e. if the source and the destination
+ // are the same registers). It is actually better to generate them here,
+ // since this would cause the copy to potentially be predicated in the next
+ // step. The predication will remove such a copy if it is unable to
+ /// predicate.
+
+ unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+ MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+ .addReg(DstR, RegState::Define, DstSR)
+ .addOperand(PredOp)
+ .addOperand(SrcOp);
+ // We don't want any kills yet.
+ TfrI->clearKillInfo();
+ DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+ return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+ if (TfrLimitActive) {
+ if (TfrCounter >= TfrLimit)
+ return false;
+ TfrCounter++;
+ }
+ DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+ << ": " << *MI);
+ MachineOperand &MD = MI->getOperand(0); // Definition
+ MachineOperand &MP = MI->getOperand(1); // Predicate register
+ assert(MD.isDef());
+ unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+ // First, create the two invididual conditional transfers, and add each
+ // of them to the live intervals information. Do that first and then remove
+ // the old instruction from live intervals.
+ if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+ addInstrToLiveness(TfrT);
+ if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+ addInstrToLiveness(TfrF);
+ removeInstrFromLiveness(MI);
+
+ return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ if (isCondset(I))
+ Changed |= split(I);
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+ if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+ return false;
+ if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+ return false;
+ // Reject instructions with multiple defs (e.g. post-increment loads).
+ bool HasDef = false;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ if (HasDef)
+ return false;
+ HasDef = true;
+ }
+ for (auto &Mo : MI->memoperands())
+ if (Mo->isVolatile())
+ return false;
+ return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+ MachineBasicBlock &B = *UseIt->getParent();
+ MachineBasicBlock::iterator I = UseIt, S = B.begin();
+ if (I == S)
+ return 0;
+
+ bool PredValid = true;
+ do {
+ --I;
+ MachineInstr *MI = &*I;
+ // Check if this instruction can be ignored, i.e. if it is predicated
+ // on the complementary condition.
+ if (PredValid && HII->isPredicated(MI)) {
+ if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+ }
+
+ // Check the defs. If the PredR is defined, invalidate it. If RD is
+ // defined, return the instruction or 0, depending on the circumstances.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ RegisterRef RR = Op;
+ if (RR.Reg == PredR) {
+ PredValid = false;
+ continue;
+ }
+ if (RR.Reg != RD.Reg)
+ continue;
+ // If the "Reg" part agrees, there is still the subregister to check.
+ // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+ // not vreg1 (w/o subregisters).
+ if (RR.Sub == RD.Sub)
+ return MI;
+ if (RR.Sub == 0 || RD.Sub == 0)
+ return 0;
+ // We have different subregisters, so we can continue looking.
+ }
+ } while (I != S);
+
+ return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+ ReferenceMap &Uses) {
+ // In order to be able to safely move MI over instructions that define
+ // "Defs" and use "Uses", no def operand from MI can be defined or used
+ // and no use operand can be defined.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ RegisterRef RR = Op;
+ // For physical register we would need to check register aliases, etc.
+ // and we don't want to bother with that. It would be of little value
+ // before the actual register rewriting (from virtual to physical).
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ // No redefs for any operand.
+ if (isRefInMap(RR, Defs, Exec_Then))
+ return false;
+ // For defs, there cannot be uses.
+ if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+ return false;
+ }
+ return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+ bool IsDown) {
+ bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+ if (!IsLoad && !IsStore)
+ return true;
+ if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+ return true;
+ if (TheI->hasUnmodeledSideEffects())
+ return false;
+
+ MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+ MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+ bool Ordered = TheI->hasOrderedMemoryRef();
+
+ // Search for aliased memory reference in (StartI, EndI).
+ for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+ MachineInstr *MI = &*I;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+ bool L = MI->mayLoad(), S = MI->mayStore();
+ if (!L && !S)
+ continue;
+ if (Ordered && MI->hasOrderedMemoryRef())
+ return false;
+
+ bool Conflict = (L && IsStore) || S;
+ if (Conflict)
+ return false;
+ }
+ return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+ // The problem with updating live intervals is that we can move one def
+ // past another def. In particular, this can happen when moving an A2_tfrt
+ // over an A2_tfrf defining the same register. From the point of view of
+ // live intervals, these two instructions are two separate definitions,
+ // and each one starts another live segment. LiveIntervals's "handleMove"
+ // does not allow such moves, so we need to handle it ourselves. To avoid
+ // invalidating liveness data while we are using it, the move will be
+ // implemented in 4 steps: (1) add a clone of the instruction MI at the
+ // target location, (2) update liveness, (3) delete the old instruction,
+ // and (4) update liveness again.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction.
+ unsigned Opc = MI->getOpcode();
+ unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+ MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+ unsigned Ox = 0, NP = MI->getNumOperands();
+ // Skip all defs from MI first.
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isDef())
+ break;
+ Ox++;
+ }
+ // Add the new def, then the predicate register, then the rest of the
+ // operands.
+ MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+ MB.addReg(PredR);
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isImplicit())
+ MB.addOperand(MO);
+ Ox++;
+ }
+
+ MachineFunction &MF = *B.getParent();
+ MachineInstr::mmo_iterator I = MI->memoperands_begin();
+ unsigned NR = std::distance(I, MI->memoperands_end());
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+ for (unsigned i = 0; i < NR; ++i)
+ MemRefs[i] = *I++;
+ MB.setMemRefs(MemRefs, MemRefs+NR);
+
+ MachineInstr *NewI = MB;
+ NewI->clearKillInfo();
+ addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+ unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last) {
+ MachineBasicBlock::iterator End = std::next(Last);
+ for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+ MachineInstr *MI = &*I;
+ // Do not touch instructions that are not predicated, or are predicated
+ // on the opposite condition.
+ if (!HII->isPredicated(MI))
+ continue;
+ if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || RO != RegisterRef(Op))
+ continue;
+ Op.setReg(RN.Reg);
+ Op.setSubReg(RN.Sub);
+ // In practice, this isn't supposed to see any defs.
+ assert(!Op.isDef() && "Not expecting a def");
+ }
+ }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+ // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+ unsigned Opc = TfrI->getOpcode();
+ (void)Opc;
+ assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+ DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+ << ": " << *TfrI);
+
+ MachineOperand &MD = TfrI->getOperand(0);
+ MachineOperand &MP = TfrI->getOperand(1);
+ MachineOperand &MS = TfrI->getOperand(2);
+ // The source operand should be a <kill>. This is not strictly necessary,
+ // but it makes things a lot simpler. Otherwise, we would need to rename
+ // some registers, which would complicate the transformation considerably.
+ if (!MS.isKill())
+ return false;
+
+ RegisterRef RT(MS);
+ unsigned PredR = MP.getReg();
+ MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+ if (!DefI || !isPredicable(DefI))
+ return false;
+
+ DEBUG(dbgs() << "Source def: " << *DefI);
+
+ // Collect the information about registers defined and used between the
+ // DefI and the TfrI.
+ // Map: reg -> bitmask of subregs
+ ReferenceMap Uses, Defs;
+ MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+ // Check if the predicate register is valid between DefI and TfrI.
+ // If it is, we can then ignore instructions predicated on the negated
+ // conditions when collecting def and use information.
+ bool PredValid = true;
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ if (!I->modifiesRegister(PredR, 0))
+ continue;
+ PredValid = false;
+ break;
+ }
+
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ MachineInstr *MI = &*I;
+ // If this instruction is predicated on the same register, it could
+ // potentially be ignored.
+ // By default assume that the instruction executes on the same condition
+ // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+ unsigned Exec = Exec_Then | Exec_Else;
+ if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+ Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ // We don't want to deal with physical registers. The reason is that
+ // they can be aliased with other physical registers. Aliased virtual
+ // registers must share the same register number, and can only differ
+ // in the subregisters, which we are keeping track of. Physical
+ // registers ters no longer have subregisters---their super- and
+ // subregisters are other physical registers, and we are not checking
+ // that.
+ RegisterRef RR = Op;
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+
+ ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+ addRefToMap(RR, Map, Exec);
+ }
+ }
+
+ // The situation:
+ // RT = DefI
+ // ...
+ // RD = TfrI ..., RT
+
+ // If the register-in-the-middle (RT) is used or redefined between
+ // DefI and TfrI, we may not be able proceed with this transformation.
+ // We can ignore a def that will not execute together with TfrI, and a
+ // use that will. If there is such a use (that does execute together with
+ // TfrI), we will not be able to move DefI down. If there is a use that
+ // executed if TfrI's condition is false, then RT must be available
+ // unconditionally (cannot be predicated).
+ // Essentially, we need to be able to rename RT to RD in this segment.
+ if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+ return false;
+ RegisterRef RD = MD;
+ // If the predicate register is defined between DefI and TfrI, the only
+ // potential thing to do would be to move the DefI down to TfrI, and then
+ // predicate. The reaching def (DefI) must be movable down to the location
+ // of the TfrI.
+ // If the target register of the TfrI (RD) is not used or defined between
+ // DefI and TfrI, consider moving TfrI up to DefI.
+ bool CanUp = canMoveOver(TfrI, Defs, Uses);
+ bool CanDown = canMoveOver(DefI, Defs, Uses);
+ // The TfrI does not access memory, but DefI could. Check if it's safe
+ // to move DefI down to TfrI.
+ if (DefI->mayLoad() || DefI->mayStore())
+ if (!canMoveMemTo(DefI, TfrI, true))
+ CanDown = false;
+
+ DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+ << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+ MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+ if (CanUp)
+ predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+ else if (CanDown)
+ predicateAt(RD, DefI, TfrIt, PredR, Cond);
+ else
+ return false;
+
+ if (RT != RD)
+ renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+ // Delete the user of RT first (it should work either way, but this order
+ // of deleting is more natural).
+ removeInstrFromLiveness(TfrI);
+ removeInstrFromLiveness(DefI);
+ return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ unsigned Opc = I->getOpcode();
+ if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+ bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+ if (!Done) {
+ // If we didn't predicate I, we may need to remove it in case it is
+ // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1.
+ if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+ removeInstrFromLiveness(I);
+ }
+ Changed |= Done;
+ }
+ }
+ return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit())
+ MI->RemoveOperand(i-1);
+ }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (HII->isPredicated(MI))
+ removeImplicitUses(MI);
+ }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+ // Implicit uses that are "undef" are only meaningful (outside of the
+ // internals of this pass) when the instruction defines a subregister,
+ // and the implicit-undef use applies to the defined register. In such
+ // cases, the proper way to record the information in the IR is to mark
+ // the definition as "undef", which will be interpreted as "read-undef".
+ typedef SmallSet<unsigned,2> RegisterSet;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ RegisterSet Undefs;
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+ MI->RemoveOperand(i-1);
+ Undefs.insert(MO.getReg());
+ }
+ }
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+ continue;
+ if (Undefs.count(Op.getReg()))
+ Op.setIsUndef(true);
+ }
+ }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BW = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ BW = (RR.Sub != 0) ? 32 : 64;
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ LiveRange::Segment &LR = *I;
+ // Range must start at a register...
+ if (!LR.start.isRegister())
+ return false;
+ // ...and end in a register or in a dead slot.
+ if (!LR.end.isRegister() && !LR.end.isDead())
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+ if (CoaLimitActive) {
+ if (CoaCounter >= CoaLimit)
+ return false;
+ CoaCounter++;
+ }
+ unsigned BW1, BW2;
+ if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+ return false;
+ if (MRI->isLiveIn(R1.Reg))
+ return false;
+ if (MRI->isLiveIn(R2.Reg))
+ return false;
+
+ LiveInterval &L1 = LIS->getInterval(R1.Reg);
+ LiveInterval &L2 = LIS->getInterval(R2.Reg);
+ bool Overlap = L1.overlaps(L2);
+
+ DEBUG(dbgs() << "compatible registers: ("
+ << (Overlap ? "overlap" : "disjoint") << ")\n "
+ << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
+ << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
+ if (R1.Sub || R2.Sub)
+ return false;
+ if (Overlap)
+ return false;
+
+ // Coalescing could have a negative impact on scheduling, so try to limit
+ // to some reasonable extent. Only consider coalescing segments, when one
+ // of them does not cross basic block boundaries.
+ if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+ return false;
+
+ MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+ // Move all live segments from L2 to L1.
+ typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+ ValueInfoMap VM;
+ for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+ VNInfo *NewVN, *OldVN = I->valno;
+ ValueInfoMap::iterator F = VM.find(OldVN);
+ if (F == VM.end()) {
+ NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+ VM.insert(std::make_pair(OldVN, NewVN));
+ } else {
+ NewVN = F->second;
+ }
+ L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+ }
+ while (L2.begin() != L2.end())
+ L2.removeSegment(*L2.begin());
+
+ updateKillFlags(R1.Reg, L1);
+ DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+ L1.verify();
+
+ return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+ SmallVector<MachineInstr*,16> Condsets;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &B = *I;
+ for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+ MachineInstr *MI = &*J;
+ if (!isCondset(MI))
+ continue;
+ MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+ if (!S1.isReg() && !S2.isReg())
+ continue;
+ Condsets.push_back(MI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+ MachineInstr *CI = Condsets[i];
+ RegisterRef RD = CI->getOperand(0);
+ RegisterRef RP = CI->getOperand(1);
+ MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+ bool Done = false;
+ // Consider this case:
+ // vreg1 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = C2_mux ..., vreg1, vreg2
+ // If vreg0 was coalesced with vreg1, we could end up with the following
+ // code:
+ // vreg0 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = A2_tfrf ..., vreg2
+ // which will later become:
+ // vreg0 = instr1 ...
+ // vreg0 = instr2_cNotPt ...
+ // i.e. there will be an unconditional definition (instr1) of vreg0
+ // followed by a conditional one. The output dependency was there before
+ // and it unavoidable, but if instr1 is predicable, we will no longer be
+ // able to predicate it here.
+ // To avoid this scenario, don't coalesce the destination register with
+ // a source register that is defined by a predicable instruction.
+ if (S1.isReg()) {
+ RegisterRef RS = S1;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S1));
+ }
+ if (!Done && S2.isReg()) {
+ RegisterRef RS = S2;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S2));
+ }
+ Changed |= Done;
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+ HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+
+ // Try to coalesce the target of a mux with one of its sources.
+ // This could eliminate a register copy in some circumstances.
+ Changed |= coalesceSegments(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ // First, simply split all muxes into a pair of conditional transfers
+ // and update the live intervals to reflect the new arrangement.
+ // This is done mainly to make the live interval update simpler, than it
+ // would be while trying to predicate instructions at the same time.
+ Changed |= splitInBlock(*I);
+ // Traverse all blocks and collapse predicable instructions feeding
+ // conditional transfers into predicated instructions.
+ // Walk over all the instructions again, so we may catch pre-existing
+ // cases that were not created in the previous step.
+ Changed |= predicateInBlock(*I);
+ }
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ postprocessUndefImplicitUses(*I);
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Expand Condsets";
+ PassInfo *PI = new PassInfo(Name, "expand-condsets",
+ &HexagonExpandCondsets::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+ return new HexagonExpandCondsets();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 0000000..6e2dbc0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,357 @@
+//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
+// scavenger mechanism available in the allocator. There are two registers
+// reserved to allow spilling/restoring predicate registers. One is used to
+// hold the predicate value. The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+namespace llvm {
+ FunctionPass *createHexagonExpandPredSpillCode();
+ void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonExpandPredSpillCode() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeHexagonExpandPredSpillCodePass(Registry);
+ }
+
+ const char *getPassName() const override {
+ return "Hexagon Expand Predicate Spill Code";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>();
+ const HexagonInstrInfo *TII = QST.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+ Opc == Hexagon::S2_storerh_pci_pseudo ||
+ Opc == Hexagon::S2_storeri_pci_pseudo ||
+ Opc == Hexagon::S2_storerd_pci_pseudo ||
+ Opc == Hexagon::S2_storerf_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pci_pseudo)
+ Opcode = Hexagon::S2_storerd_pci;
+ else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+ Opcode = Hexagon::S2_storeri_pci;
+ else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+ Opcode = Hexagon::S2_storerh_pci;
+ else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+ Opcode = Hexagon::S2_storerf_pci;
+ else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+ Opcode = Hexagon::S2_storerb_pci;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ MachineOperand &Op4 = MI->getOperand(4);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op4);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+ Opc == Hexagon::L2_loadri_pci_pseudo ||
+ Opc == Hexagon::L2_loadrh_pci_pseudo ||
+ Opc == Hexagon::L2_loadruh_pci_pseudo||
+ Opc == Hexagon::L2_loadrb_pci_pseudo ||
+ Opc == Hexagon::L2_loadrub_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+ Opcode = Hexagon::L2_loadrd_pci;
+ else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+ Opcode = Hexagon::L2_loadri_pci;
+ else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+ Opcode = Hexagon::L2_loadrh_pci;
+ else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+ Opcode = Hexagon::L2_loadruh_pci;
+ else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+ Opcode = Hexagon::L2_loadrb_pci;
+ else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+ Opcode = Hexagon::L2_loadrub_pci;
+ else
+ llvm_unreachable("wrong Opc");
+
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ MachineOperand &Op5 = MI->getOperand(5);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(Op5);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+ Opc == Hexagon::L2_loadri_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+ Opc == Hexagon::L2_loadruh_pbr_pseudo||
+ Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrd_pbr;
+ else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+ Opcode = Hexagon::L2_loadri_pbr;
+ else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrh_pbr;
+ else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadruh_pbr;
+ else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrb_pbr;
+ else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrub_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+ Opc == Hexagon::S2_storeri_pbr_pseudo ||
+ Opc == Hexagon::S2_storerh_pbr_pseudo ||
+ Opc == Hexagon::S2_storerb_pbr_pseudo ||
+ Opc == Hexagon::S2_storerf_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+ Opcode = Hexagon::S2_storerd_pbr;
+ else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+ Opcode = Hexagon::S2_storeri_pbr;
+ else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+ Opcode = Hexagon::S2_storerh_pbr;
+ else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+ Opcode = Hexagon::S2_storerf_pbr;
+ else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+ Opcode = Hexagon::S2_storerb_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::STriw_pred) {
+ // STriw_pred [R30], ofst, SrcReg;
+ unsigned FP = MI->getOperand(0).getReg();
+ assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(1).isImm() && "Not an offset");
+ int Offset = MI->getOperand(1).getImm();
+ int SrcReg = MI->getOperand(2).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+ if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) {
+ if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::S2_storeri_io))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::S2_storeri_io))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0)
+ .addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::S2_storeri_io)).
+ addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred [R30], ofst.
+ int DstReg = MI->getOperand(0).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+ "Not a predicate register");
+ unsigned FP = MI->getOperand(1).getReg();
+ assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(2).isImm() && "Not an offset");
+ int Offset = MI->getOperand(2).getImm();
+ if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) {
+ if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP)
+ .addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+ HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Expand Predicate Spill Code";
+ PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
+ &HexagonExpandPredSpillCode::ID,
+ nullptr, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonExpandPredSpillCode() {
+ return new HexagonExpandPredSpillCode();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 0000000..d0c7f9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,188 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the instruction needs to use a constant extender.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxLoopRange(
+ "hexagon-loop-range", cl::Hidden, cl::init(200),
+ cl::desc("Restrict range of loopN instructions (testing only)"));
+
+namespace llvm {
+ FunctionPass *createHexagonFixupHwLoops();
+ void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+ initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Hexagon Hardware Loop Fixup";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// \brief Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// \brief Replace loop instruction with the constant extended
+ /// version if the loop label is too far from the loop instruction.
+ void useExtLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII);
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+ "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::J2_loop0r ||
+ MI->getOpcode() == Hexagon::J2_loop0i ||
+ MI->getOpcode() == Hexagon::J2_loop1r ||
+ MI->getOpcode() == Hexagon::J2_loop1i;
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ return fixupLoopInstrs(MF);
+}
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset;
+
+ const HexagonInstrInfo *HII =
+ static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // First pass - compute the offset of each basic block.
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.getAlignment()) {
+ // Although we don't know the exact layout of the final code, we need
+ // to account for alignment padding somehow. This heuristic pads each
+ // aligned basic block according to the alignment value.
+ int ByteAlign = (1u << MBB.getAlignment()) - 1;
+ InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+ }
+
+ BlockToInstOffset[&MBB] = InstOffset;
+ for (const MachineInstr &MI : MBB)
+ InstOffset += HII->getSize(&MI);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to be
+ // converted.
+ InstOffset = 0;
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ InstOffset = BlockToInstOffset[&MBB];
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MII = MBB.begin();
+ MachineBasicBlock::iterator MIE = MBB.end();
+ while (MII != MIE) {
+ InstOffset += HII->getSize(&*MII);
+ if (MII->isDebugValue()) {
+ ++MII;
+ continue;
+ }
+ if (isHardwareLoop(MII)) {
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ if ((unsigned)abs(diff) > MaxLoopRange) {
+ useExtLoopInstr(MF, MII);
+ MII = MBB.erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// \brief Replace loop instructions with the constant extended version.
+void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII) {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ MachineInstrBuilder MIB;
+ unsigned newOp;
+ switch (MII->getOpcode()) {
+ case Hexagon::J2_loop0r:
+ newOp = Hexagon::J2_loop0rext;
+ break;
+ case Hexagon::J2_loop0i:
+ newOp = Hexagon::J2_loop0iext;
+ break;
+ case Hexagon::J2_loop1r:
+ newOp = Hexagon::J2_loop1rext;
+ break;
+ case Hexagon::J2_loop1i:
+ newOp = Hexagon::J2_loop1iext;
+ break;
+ default:
+ llvm_unreachable("Invalid Hardware Loop Instruction.");
+ }
+ MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
+
+ for (unsigned i = 0; i < MII->getNumOperands(); ++i)
+ MIB.addOperand(MII->getOperand(i));
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 0000000..7a52a1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,1479 @@
+//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-pei"
+
+#include "HexagonFrameLowering.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+// Hexagon stack frame layout as defined by the ABI:
+//
+// Incoming arguments
+// passed via stack
+// |
+// |
+// SP during function's FP during function's |
+// +-- runtime (top of stack) runtime (bottom) --+ |
+// | | |
+// --++---------------------+------------------+-----------------++-+-------
+// | parameter area for | variable-size | fixed-size |LR| arg
+// | called functions | local objects | local objects |FP|
+// --+----------------------+------------------+-----------------+--+-------
+// <- size known -> <- size unknown -> <- size known ->
+//
+// Low address High address
+//
+// <--- stack growth
+//
+//
+// - In any circumstances, the outgoing function arguments are always accessi-
+// ble using the SP, and the incoming arguments are accessible using the FP.
+// - If the local objects are not aligned, they can always be accessed using
+// the FP.
+// - If there are no variable-sized objects, the local objects can always be
+// accessed using the SP, regardless whether they are aligned or not. (The
+// alignment padding will be at the bottom of the stack (highest address),
+// and so the offset with respect to the SP will be known at the compile-
+// -time.)
+//
+// The only complication occurs if there are both, local aligned objects, and
+// dynamically allocated (variable-sized) objects. The alignment pad will be
+// placed between the FP and the local objects, thus preventing the use of the
+// FP to access the local objects. At the same time, the variable-sized objects
+// will be between the SP and the local objects, thus introducing an unknown
+// distance from the SP to the locals.
+//
+// To avoid this problem, a new register is created that holds the aligned
+// address of the bottom of the stack, referred in the sources as AP (aligned
+// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
+// that aligns AP to the required boundary (a maximum of the alignments of
+// all stack objects, fixed- and variable-sized). All local objects[1] will
+// then use AP as the base pointer.
+// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
+// their name from being allocated at fixed locations on the stack, relative
+// to the FP. In the presence of dynamic allocation and local alignment, such
+// objects can only be accessed through the FP.
+//
+// Illustration of the AP:
+// FP --+
+// |
+// ---------------+---------------------+-----+-----------------------++-+--
+// Rest of the | Local stack objects | Pad | Fixed stack objects |LR|
+// stack frame | (aligned) | | (CSR, spills, etc.) |FP|
+// ---------------+---------------------+-----+-----------------+-----+--+--
+// |<-- Multiple of the -->|
+// stack alignment +-- AP
+//
+// The AP is set up at the beginning of the function. Since it is not a dedi-
+// cated (reserved) register, it needs to be kept live throughout the function
+// to be available as the base register for local object accesses.
+// Normally, an address of a stack objects is obtained by a pseudo-instruction
+// TFR_FI. To access local objects with the AP register present, a different
+// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
+// argument compared to TFR_FI: the first input register is the AP register.
+// This keeps the register live between its definition and its uses.
+
+// The AP register is originally set up using pseudo-instruction ALIGNA:
+// AP = ALIGNA A
+// where
+// A - required stack alignment
+// The alignment value must be the maximum of all alignments required by
+// any stack object.
+
+// The dynamic allocation uses a pseudo-instruction ALLOCA:
+// Rd = ALLOCA Rs, A
+// where
+// Rd - address of the allocated space
+// Rs - minimum size (the actual allocated can be larger to accommodate
+// alignment)
+// A - required alignment
+
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
+ cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
+
+
+static cl::opt<int> NumberScavengerSlots("number-scavenger-slots",
+ cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
+ cl::ZeroOrMore);
+
+static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
+ cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
+ cl::init(6), cl::ZeroOrMore);
+
+static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
+ cl::Hidden, cl::desc("Specify Os spill func threshold"),
+ cl::init(1), cl::ZeroOrMore);
+
+static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable stack frame shrink wrapping"));
+
+static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
+ "shrink-wraps"));
+
+static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
+ cl::Hidden, cl::desc("Use allocframe more conservatively"));
+
+
+namespace llvm {
+ void initializeHexagonCallFrameInformationPass(PassRegistry&);
+ FunctionPass *createHexagonCallFrameInformation();
+}
+
+namespace {
+ class HexagonCallFrameInformation : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonCallFrameInformation() : MachineFunctionPass(ID) {
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeHexagonCallFrameInformationPass(PR);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ };
+
+ char HexagonCallFrameInformation::ID = 0;
+}
+
+bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
+ auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+ bool NeedCFI = MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ if (!NeedCFI)
+ return false;
+ HFI.insertCFIInstructions(MF);
+ return true;
+}
+
+INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
+ "Hexagon call frame information", false, false)
+
+FunctionPass *llvm::createHexagonCallFrameInformation() {
+ return new HexagonCallFrameInformation();
+}
+
+
+namespace {
+ /// Map a register pair Reg to the subregister that has the greater "number",
+ /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
+ unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
+ bool hireg = true) {
+ if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
+ return Reg;
+
+ unsigned RegNo = 0;
+ for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
+ if (hireg) {
+ if (*SubRegs > RegNo)
+ RegNo = *SubRegs;
+ } else {
+ if (!RegNo || *SubRegs < RegNo)
+ RegNo = *SubRegs;
+ }
+ }
+ return RegNo;
+ }
+
+ /// Returns the callee saved register with the largest id in the vector.
+ unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo &TRI) {
+ assert(Hexagon::R1 > 0 &&
+ "Assume physical registers are encoded as positive integers");
+ if (CSI.empty())
+ return 0;
+
+ unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
+ for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
+ unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
+ if (Reg > Max)
+ Max = Reg;
+ }
+ return Max;
+ }
+
+ /// Checks if the basic block contains any instruction that needs a stack
+ /// frame to be already in place.
+ bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) {
+ for (auto &I : MBB) {
+ const MachineInstr *MI = &I;
+ if (MI->isCall())
+ return true;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::ALLOCA:
+ case Hexagon::ALIGNA:
+ return true;
+ default:
+ break;
+ }
+ // Check individual operands.
+ for (const MachineOperand &MO : MI->operands()) {
+ // While the presence of a frame index does not prove that a stack
+ // frame will be required, all frame indexes should be within alloc-
+ // frame/deallocframe. Otherwise, the code that translates a frame
+ // index into an offset would have to be aware of the placement of
+ // the frame creation/destruction instructions.
+ if (MO.isFI())
+ return true;
+ if (!MO.isReg())
+ continue;
+ unsigned R = MO.getReg();
+ // Virtual registers will need scavenging, which then may require
+ // a stack slot.
+ if (TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ if (CSR[R])
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// Returns true if MBB has a machine instructions that indicates a tail call
+ /// in the block.
+ bool hasTailCall(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ unsigned RetOpc = I->getOpcode();
+ return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
+ }
+
+ /// Returns true if MBB contains an instruction that returns.
+ bool hasReturn(const MachineBasicBlock &MBB) {
+ for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
+ if (I->isReturn())
+ return true;
+ return false;
+ }
+}
+
+
+/// Implements shrink-wrapping of the stack frame. By default, stack frame
+/// is created in the function entry block, and is cleaned up in every block
+/// that returns. This function finds alternate blocks: one for the frame
+/// setup (prolog) and one for the cleanup (epilog).
+void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
+ MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
+ static unsigned ShrinkCounter = 0;
+
+ if (ShrinkLimit.getPosition()) {
+ if (ShrinkCounter >= ShrinkLimit)
+ return;
+ ShrinkCounter++;
+ }
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ MachineDominatorTree MDT;
+ MDT.runOnMachineFunction(MF);
+ MachinePostDominatorTree MPT;
+ MPT.runOnMachineFunction(MF);
+
+ typedef DenseMap<unsigned,unsigned> UnsignedMap;
+ UnsignedMap RPO;
+ typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+ RPOTType RPOT(&MF);
+ unsigned RPON = 0;
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ RPO[(*I)->getNumber()] = RPON++;
+
+ // Don't process functions that have loops, at least for now. Placement
+ // of prolog and epilog must take loop structure into account. For simpli-
+ // city don't do it right now.
+ for (auto &I : MF) {
+ unsigned BN = RPO[I.getNumber()];
+ for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+ // If found a back-edge, return.
+ if (RPO[(*SI)->getNumber()] <= BN)
+ return;
+ }
+ }
+
+ // Collect the set of blocks that need a stack frame to execute. Scan
+ // each block for uses/defs of callee-saved registers, calls, etc.
+ SmallVector<MachineBasicBlock*,16> SFBlocks;
+ BitVector CSR(Hexagon::NUM_TARGET_REGS);
+ for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
+ CSR[*P] = true;
+
+ for (auto &I : MF)
+ if (needsStackFrame(I, CSR))
+ SFBlocks.push_back(&I);
+
+ DEBUG({
+ dbgs() << "Blocks needing SF: {";
+ for (auto &B : SFBlocks)
+ dbgs() << " BB#" << B->getNumber();
+ dbgs() << " }\n";
+ });
+ // No frame needed?
+ if (SFBlocks.empty())
+ return;
+
+ // Pick a common dominator and a common post-dominator.
+ MachineBasicBlock *DomB = SFBlocks[0];
+ for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+ DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
+ if (!DomB)
+ break;
+ }
+ MachineBasicBlock *PDomB = SFBlocks[0];
+ for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+ PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
+ if (!PDomB)
+ break;
+ }
+ DEBUG({
+ dbgs() << "Computed dom block: BB#";
+ if (DomB) dbgs() << DomB->getNumber();
+ else dbgs() << "<null>";
+ dbgs() << ", computed pdom block: BB#";
+ if (PDomB) dbgs() << PDomB->getNumber();
+ else dbgs() << "<null>";
+ dbgs() << "\n";
+ });
+ if (!DomB || !PDomB)
+ return;
+
+ // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
+ if (!MDT.dominates(DomB, PDomB)) {
+ DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
+ return;
+ }
+ if (!MPT.dominates(PDomB, DomB)) {
+ DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
+ return;
+ }
+
+ // Finally, everything seems right.
+ PrologB = DomB;
+ EpilogB = PDomB;
+}
+
+/// Perform most of the PEI work here:
+/// - saving/restoring of the callee-saved registers,
+/// - stack frame creation and destruction.
+/// Normally, this work is distributed among various functions, but doing it
+/// in one place allows shrink-wrapping of the stack frame.
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
+ if (EnableShrinkWrapping)
+ findShrunkPrologEpilog(MF, PrologB, EpilogB);
+
+ insertCSRSpillsInBlock(*PrologB, CSI, HRI);
+ insertPrologueInBlock(*PrologB);
+
+ if (EpilogB) {
+ insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
+ insertEpilogueInBlock(*EpilogB);
+ } else {
+ for (auto &B : MF)
+ if (B.isReturnBlock())
+ insertCSRRestoresInBlock(B, CSI, HRI);
+
+ for (auto &B : MF)
+ if (B.isReturnBlock())
+ insertEpilogueInBlock(B);
+ }
+}
+
+
+void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+ DebugLoc dl;
+
+ unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
+
+ // Calculate the total stack frame size.
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+ // Round up the max call frame size to the max alignment on the stack.
+ unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign);
+ MFI->setMaxCallFrameSize(MaxCFA);
+
+ FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign);
+ MFI->setStackSize(FrameSize);
+
+ bool AlignStack = (MaxAlign > getStackAlignment());
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned NumBytes = MFI->getStackSize();
+ unsigned SP = HRI.getStackRegister();
+ unsigned MaxCF = MFI->getMaxCallFrameSize();
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+ auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
+ auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
+
+ for (auto MI : AdjustRegs) {
+ assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
+ expandAlloca(MI, HII, SP, MaxCF);
+ MI->eraseFromParent();
+ }
+
+ if (!hasFP(MF))
+ return;
+
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const unsigned int ALLOCFRAME_MAX = 16384;
+
+ // Create a dummy memory operand to avoid allocframe from being treated as
+ // a volatile memory reference.
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+ 4, 4);
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(0)
+ .addMemOperand(MMO);
+
+ // Subtract offset from frame pointer.
+ // We use a caller-saved non-parameter register for that.
+ unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
+ CallerSavedReg).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+ .addReg(SP)
+ .addReg(CallerSavedReg);
+ } else {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+ .addImm(NumBytes)
+ .addMemOperand(MMO);
+ }
+
+ if (AlignStack) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(MaxAlign));
+ }
+}
+
+void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ if (!hasFP(MF))
+ return;
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+ unsigned SP = HRI.getStackRegister();
+
+ MachineInstr *RetI = nullptr;
+ for (auto &I : MBB) {
+ if (!I.isReturn())
+ continue;
+ RetI = &I;
+ break;
+ }
+ unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
+
+ MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+ DebugLoc DL;
+ if (InsertPt != MBB.end())
+ DL = InsertPt->getDebugLoc();
+ else if (!MBB.empty())
+ DL = std::prev(MBB.end())->getDebugLoc();
+
+ // Handle EH_RETURN.
+ if (RetOpc == Hexagon::EH_RETURN_JMPR) {
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+ .addReg(SP)
+ .addReg(Hexagon::R28);
+ return;
+ }
+
+ // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
+ // frame instruction if we encounter it.
+ if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
+ MachineBasicBlock::iterator It = RetI;
+ ++It;
+ // Delete all instructions after the RESTORE (except labels).
+ while (It != MBB.end()) {
+ if (!It->isLabel())
+ It = MBB.erase(It);
+ else
+ ++It;
+ }
+ return;
+ }
+
+ // It is possible that the restoring code is a call to a library function.
+ // All of the restore* functions include "deallocframe", so we need to make
+ // sure that we don't add an extra one.
+ bool NeedsDeallocframe = true;
+ if (!MBB.empty() && InsertPt != MBB.begin()) {
+ MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
+ unsigned COpc = PrevIt->getOpcode();
+ if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
+ NeedsDeallocframe = false;
+ }
+
+ if (!NeedsDeallocframe)
+ return;
+ // If the returning instruction is JMPret, replace it with dealloc_return,
+ // otherwise just add deallocframe. The function could be returning via a
+ // tail call.
+ if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
+ BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+ return;
+ }
+ unsigned NewOpc = Hexagon::L4_return;
+ MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+ // Transfer the function live-out registers.
+ NewI->copyImplicitOps(MF, RetI);
+ MBB.erase(RetI);
+}
+
+
+namespace {
+ bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
+ if (!It->isBundle())
+ return It->getOpcode() == Hexagon::S2_allocframe;
+ auto End = It->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
+ while (++I != End && I->isBundled())
+ if (I->getOpcode() == Hexagon::S2_allocframe)
+ return true;
+ return false;
+ }
+
+ MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
+ for (auto &I : B)
+ if (IsAllocFrame(I))
+ return I;
+ return B.end();
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
+ for (auto &B : MF) {
+ auto AF = FindAllocFrame(B);
+ if (AF == B.end())
+ continue;
+ insertCFIInstructionsAt(B, ++AF);
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // If CFI instructions have debug information attached, something goes
+ // wrong with the final assembly generation: the prolog_end is placed
+ // in a wrong location.
+ DebugLoc DL;
+ const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
+
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+ if (hasFP(MF)) {
+ unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+ unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+ // Define CFA via an offset from the value of FP.
+ //
+ // -8 -4 0 (SP)
+ // --+----+----+---------------------
+ // | FP | LR | increasing addresses -->
+ // --+----+----+---------------------
+ // | +-- Old SP (before allocframe)
+ // +-- New FP (after allocframe)
+ //
+ // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+ // MCCFIInstruction::createOffset takes the offset without sign change.
+ auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(DefCfa));
+ // R31 (return addr) = CFA - 4
+ auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR31));
+ // R30 (frame ptr) = CFA - 8
+ auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR30));
+ }
+
+ static unsigned int RegsToMove[] = {
+ Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
+ Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+ Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+ Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+ Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9,
+ Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
+ Hexagon::NoRegister
+ };
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
+ unsigned Reg = RegsToMove[i];
+ auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
+ return C.getReg() == Reg;
+ };
+ auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+ if (F == CSI.end())
+ continue;
+
+ // Subtract 8 to make room for R30 and R31, which are added above.
+ unsigned FrameReg;
+ int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
+
+ if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
+ unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
+ auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffReg));
+ } else {
+ // Split the double regs into subregs, and generate appropriate
+ // cfi_offsets.
+ // The only reason, we are split double regs is, llvm-mc does not
+ // understand paired registers for cfi_offset.
+ // Eg .cfi_offset r1:0, -64
+
+ unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
+ unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+ unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+ unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+ auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
+ Offset+4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffHi));
+ auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffLo));
+ }
+ }
+}
+
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ bool HasFixed = MFI.getNumFixedObjects();
+ bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
+ .getLocalFrameObjectCount();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+
+ // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
+ // that this shouldn't be required, but doing so now because gcc does and
+ // gdb can't break at the start of the function without it. Will remove if
+ // this turns out to be a gdb bug.
+ //
+ if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ // By default we want to use SP (since it's always there). FP requires
+ // some setup (i.e. ALLOCFRAME).
+ // Fixed and preallocated objects need FP if the distance from them to
+ // the SP is unknown (as is with alloca or aligna).
+ if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ return true;
+
+ if (MFI.getStackSize() > 0) {
+ if (UseAllocframe)
+ return true;
+ }
+
+ if (MFI.hasCalls() ||
+ MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ return true;
+
+ return false;
+}
+
+
+enum SpillKind {
+ SK_ToMem,
+ SK_FromMem,
+ SK_FromMemTailcall
+};
+
+static const char *
+getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) {
+ const char * V4SpillToMemoryFunctions[] = {
+ "__save_r16_through_r17",
+ "__save_r16_through_r19",
+ "__save_r16_through_r21",
+ "__save_r16_through_r23",
+ "__save_r16_through_r25",
+ "__save_r16_through_r27" };
+
+ const char * V4SpillFromMemoryFunctions[] = {
+ "__restore_r16_through_r17_and_deallocframe",
+ "__restore_r16_through_r19_and_deallocframe",
+ "__restore_r16_through_r21_and_deallocframe",
+ "__restore_r16_through_r23_and_deallocframe",
+ "__restore_r16_through_r25_and_deallocframe",
+ "__restore_r16_through_r27_and_deallocframe" };
+
+ const char * V4SpillFromMemoryTailcallFunctions[] = {
+ "__restore_r16_through_r17_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r19_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r21_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r23_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r25_and_deallocframe_before_tailcall",
+ "__restore_r16_through_r27_and_deallocframe_before_tailcall"
+ };
+
+ const char **SpillFunc = nullptr;
+
+ switch(SpillType) {
+ case SK_ToMem:
+ SpillFunc = V4SpillToMemoryFunctions;
+ break;
+ case SK_FromMem:
+ SpillFunc = V4SpillFromMemoryFunctions;
+ break;
+ case SK_FromMemTailcall:
+ SpillFunc = V4SpillFromMemoryTailcallFunctions;
+ break;
+ }
+ assert(SpillFunc && "Unknown spill kind");
+
+ // Spill all callee-saved registers up to the highest register used.
+ switch (MaxReg) {
+ case Hexagon::R17:
+ return SpillFunc[0];
+ case Hexagon::R19:
+ return SpillFunc[1];
+ case Hexagon::R21:
+ return SpillFunc[2];
+ case Hexagon::R23:
+ return SpillFunc[3];
+ case Hexagon::R25:
+ return SpillFunc[4];
+ case Hexagon::R27:
+ return SpillFunc[5];
+ default:
+ llvm_unreachable("Unhandled maximum callee save register");
+ }
+ return 0;
+}
+
+/// Adds all callee-saved registers up to MaxReg to the instruction.
+static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
+ unsigned MaxReg, bool IsDef) {
+ // Add the callee-saved registers as implicit uses.
+ for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) {
+ MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true);
+ Inst->addOperand(ImpUse);
+ }
+}
+
+
+int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ // Large parts of this code are shared with HRI::eliminateFrameIndex.
+ int Offset = MFI.getObjectOffset(FI);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
+ unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
+ unsigned AP = 0;
+ if (const MachineInstr *AI = getAlignaInstr(MF))
+ AP = AI->getOperand(0).getReg();
+ unsigned FrameSize = MFI.getStackSize();
+
+ bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
+ // Use FP at -O0, except when there are objects with extra alignment.
+ // That additional alignment requirement may cause a pad to be inserted,
+ // which will make it impossible to use FP to access objects located
+ // past the pad.
+ if (NoOpt && !HasExtraAlign)
+ UseFP = true;
+ if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+ // Fixed and preallocated objects will be located before any padding
+ // so FP must be used to access them.
+ UseFP |= (HasAlloca || HasExtraAlign);
+ } else {
+ if (HasAlloca) {
+ if (HasExtraAlign)
+ UseAP = true;
+ else
+ UseFP = true;
+ }
+ }
+
+ // If FP was picked, then there had better be FP.
+ bool HasFP = hasFP(MF);
+ assert((HasFP || !UseFP) && "This function must have frame pointer");
+
+ // Having FP implies allocframe. Allocframe will store extra 8 bytes:
+ // FP/LR. If the base register is used to access an object across these
+ // 8 bytes, then the offset will need to be adjusted by 8.
+ //
+ // After allocframe:
+ // HexagonISelLowering adds 8 to ---+
+ // the offsets of all stack-based |
+ // arguments (*) |
+ // |
+ // getObjectOffset < 0 0 8 getObjectOffset >= 8
+ // ------------------------+-----+------------------------> increasing
+ // <local objects> |FP/LR| <input arguments> addresses
+ // -----------------+------+-----+------------------------>
+ // | |
+ // SP/AP point --+ +-- FP points here (**)
+ // somewhere on
+ // this side of FP/LR
+ //
+ // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
+ // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
+
+ // The lowering assumes that FP/LR is present, and so the offsets of
+ // the formal arguments start at 8. If FP/LR is not there we need to
+ // reduce the offset by 8.
+ if (Offset > 0 && !HasFP)
+ Offset -= 8;
+
+ if (UseFP)
+ FrameReg = FP;
+ else if (UseAP)
+ FrameReg = AP;
+ else
+ FrameReg = SP;
+
+ // Calculate the actual offset in the instruction. If there is no FP
+ // (in other words, no allocframe), then SP will not be adjusted (i.e.
+ // there will be no SP -= FrameSize), so the frame size should not be
+ // added to the calculated offset.
+ int RealOffset = Offset;
+ if (!UseFP && !UseAP && HasFP)
+ RealOffset = FrameSize+Offset;
+ return RealOffset;
+}
+
+
+bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
+ const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineBasicBlock::iterator MI = MBB.begin();
+ MachineFunction &MF = *MBB.getParent();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+ if (useSpillFunction(MF, CSI)) {
+ unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
+ const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem);
+ // Call spill function.
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+ MachineInstr *SaveRegsCall =
+ BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+ .addExternalSymbol(SpillFun);
+ // Add callee-saved registers as use.
+ addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
+ // Add live in registers.
+ for (unsigned I = 0; I < CSI.size(); ++I)
+ MBB.addLiveIn(CSI[I].getReg());
+ return true;
+ }
+
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Add live in registers. We treat eh_return callee saved register r0 - r3
+ // specially. They are not really callee saved registers as they are not
+ // supposed to be killed.
+ bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
+ int FI = CSI[i].getFrameIdx();
+ const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+ HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+ if (IsKill)
+ MBB.addLiveIn(Reg);
+ }
+ return true;
+}
+
+
+bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
+ const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+ MachineFunction &MF = *MBB.getParent();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+ if (useRestoreFunction(MF, CSI)) {
+ bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
+ unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
+ SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
+ const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
+
+ // Call spill function.
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
+ : MBB.getLastNonDebugInstr()->getDebugLoc();
+ MachineInstr *DeallocCall = nullptr;
+
+ if (HasTC) {
+ unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
+ DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
+ .addExternalSymbol(RestoreFn);
+ } else {
+ // The block has a return.
+ MachineBasicBlock::iterator It = MBB.getFirstTerminator();
+ assert(It->isReturn() && std::next(It) == MBB.end());
+ unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
+ DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
+ .addExternalSymbol(RestoreFn);
+ // Transfer the function live-out registers.
+ DeallocCall->copyImplicitOps(MF, It);
+ }
+ addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true);
+ return true;
+ }
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+ int FI = CSI[i].getFrameIdx();
+ HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
+ }
+ return true;
+}
+
+
+void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ unsigned Opc = MI.getOpcode();
+ (void)Opc; // Silence compiler warning.
+ assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
+ "Cannot handle this call frame pseudo instruction");
+ MBB.erase(I);
+}
+
+
+void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function has uses aligned stack and also has variable sized stack
+ // objects, then we need to map all spill slots to fixed positions, so that
+ // they can be accessed through FP. Otherwise they would have to be accessed
+ // via AP, which may not be available at the particular place in the program.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool HasAlloca = MFI->hasVarSizedObjects();
+ bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
+
+ if (!HasAlloca || !NeedsAlign)
+ return;
+
+ unsigned LFS = MFI->getLocalFrameSize();
+ int Offset = -LFS;
+ for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
+ continue;
+ int S = MFI->getObjectSize(i);
+ LFS += S;
+ Offset -= S;
+ MFI->mapLocalFrameObject(i, Offset);
+ }
+
+ MFI->setLocalFrameSize(LFS);
+ unsigned A = MFI->getLocalFrameMaxAlign();
+ assert(A <= 8 && "Unexpected local frame alignment");
+ if (A == 0)
+ MFI->setLocalFrameMaxAlign(8);
+ MFI->setUseLocalStackAllocationBlock(true);
+}
+
+/// Returns true if there is no caller saved registers available.
+static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
+ const HexagonRegisterInfo &HRI) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF);
+ // Check for an unused caller-saved register.
+ for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
+ MCPhysReg FreeReg = *CallerSavedRegs;
+ if (!MRI.reg_nodbg_empty(FreeReg))
+ continue;
+
+ // Check aliased register usage.
+ bool IsCurrentRegUsed = false;
+ for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
+ if (!MRI.reg_nodbg_empty(*AI)) {
+ IsCurrentRegUsed = true;
+ break;
+ }
+ if (IsCurrentRegUsed)
+ continue;
+
+ // Neither directly used nor used through an aliased register.
+ return false;
+ }
+ // All caller-saved registers are used.
+ return true;
+}
+
+
+/// Replaces the predicate spill code pseudo instructions by valid instructions.
+bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
+ const {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HII = *HST.getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool HasReplacedPseudoInst = false;
+ // Replace predicate spill pseudo instructions by real code.
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+ // Traverse the basic block.
+ MachineBasicBlock::iterator NextII;
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ MII = NextII) {
+ MachineInstr *MI = MII;
+ NextII = std::next(MII);
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ HasReplacedPseudoInst = true;
+ // STriw_pred FI, 0, SrcReg;
+ unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ bool IsOrigSrcRegKilled = MI->getOperand(2).isKill();
+
+ assert(MI->getOperand(0).isFI() && "Expect a frame index");
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+
+ // Insert transfer to general purpose register.
+ // VirtReg = C2_tfrpr SrcPredReg
+ BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr),
+ VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled));
+
+ // Change instruction to S2_storeri_io.
+ // S2_storeri_io FI, 0, VirtReg
+ MI->setDesc(HII.get(Hexagon::S2_storeri_io));
+ MI->getOperand(2).setReg(VirtReg);
+ MI->getOperand(2).setIsKill();
+
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred FI, 0
+ MachineOperand &M0 = MI->getOperand(0);
+ if (M0.isDead()) {
+ MBB->erase(MII);
+ continue;
+ }
+
+ unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ unsigned DestReg = MI->getOperand(0).getReg();
+
+ assert(MI->getOperand(1).isFI() && "Expect a frame index");
+ assert(Hexagon::PredRegsRegClass.contains(DestReg) &&
+ "Not a predicate register");
+
+ // Change instruction to L2_loadri_io.
+ // VirtReg = L2_loadri_io FI, 0
+ MI->setDesc(HII.get(Hexagon::L2_loadri_io));
+ MI->getOperand(0).setReg(VirtReg);
+
+ // Insert transfer to general purpose register.
+ // DestReg = C2_tfrrp VirtReg
+ const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp);
+ BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg)
+ .addReg(VirtReg, getKillRegState(true));
+ HasReplacedPseudoInst = true;
+ }
+ }
+ }
+ return HasReplacedPseudoInst;
+}
+
+
+void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HRI = *HST.getRegisterInfo();
+
+ bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
+
+ // If we have a function containing __builtin_eh_return we want to spill and
+ // restore all callee saved registers. Pretend that they are used.
+ if (HasEHReturn) {
+ for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
+ ++CSRegs)
+ SavedRegs.set(*CSRegs);
+ }
+
+ const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
+
+ // Replace predicate register pseudo spill code.
+ bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF);
+
+ // We need to reserve a a spill slot if scavenging could potentially require
+ // spilling a scavenged register.
+ if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ for (int i=0; i < NumberScavengerSlots; i++)
+ RS->addScavengingFrameIndex(
+ MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment()));
+ }
+}
+
+
+#ifndef NDEBUG
+static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
+ unsigned R = x;
+ dbgs() << ' ' << PrintReg(R, &TRI);
+ }
+ dbgs() << " }";
+}
+#endif
+
+
+bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
+ << MF.getFunction()->getName() << '\n');
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector SRegs(Hexagon::NUM_TARGET_REGS);
+
+ // Generate a set of unique, callee-saved registers (SRegs), where each
+ // register in the set is maximal in terms of sub-/super-register relation,
+ // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
+
+ // (1) For each callee-saved register, add that register and all of its
+ // sub-registers to SRegs.
+ DEBUG(dbgs() << "Initial CS registers: {");
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ unsigned R = CSI[i].getReg();
+ DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
+ for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+ SRegs[*SR] = true;
+ }
+ DEBUG(dbgs() << " }\n");
+ DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (2) For each reserved register, remove that register and all of its
+ // sub- and super-registers from SRegs.
+ BitVector Reserved = TRI->getReservedRegs(MF);
+ for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+ SRegs[*SR] = false;
+ }
+ DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n");
+ DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (3) Collect all registers that have at least one sub-register in SRegs,
+ // and also have no sub-registers that are reserved. These will be the can-
+ // didates for saving as a whole instead of their individual sub-registers.
+ // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
+ BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
+ TmpSup[*SR] = true;
+ }
+ for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
+ unsigned R = x;
+ for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
+ if (!Reserved[*SR])
+ continue;
+ TmpSup[R] = false;
+ break;
+ }
+ }
+ DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
+
+ // (4) Include all super-registers found in (3) into SRegs.
+ SRegs |= TmpSup;
+ DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // (5) For each register R in SRegs, if any super-register of R is in SRegs,
+ // remove R from SRegs.
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
+ if (!SRegs[*SR])
+ continue;
+ SRegs[R] = false;
+ break;
+ }
+ }
+ DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+ // Now, for each register that has a fixed stack slot, create the stack
+ // object for it.
+ CSI.clear();
+
+ typedef TargetFrameLowering::SpillSlot SpillSlot;
+ unsigned NumFixed;
+ int MinOffset = 0; // CS offsets are negative.
+ const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
+ for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
+ if (!SRegs[S->Reg])
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
+ int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+ MinOffset = std::min(MinOffset, S->Offset);
+ CSI.push_back(CalleeSavedInfo(S->Reg, FI));
+ SRegs[S->Reg] = false;
+ }
+
+ // There can be some registers that don't have fixed slots. For example,
+ // we need to store R0-R3 in functions with exception handling. For each
+ // such register, create a non-fixed stack object.
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
+ int Off = MinOffset - RC->getSize();
+ unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
+ assert(isPowerOf2_32(Align));
+ Off &= -Align;
+ int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
+ MinOffset = std::min(MinOffset, Off);
+ CSI.push_back(CalleeSavedInfo(R, FI));
+ SRegs[R] = false;
+ }
+
+ DEBUG({
+ dbgs() << "CS information: {";
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ int FI = CSI[i].getFrameIdx();
+ int Off = MFI->getObjectOffset(FI);
+ dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+ if (Off >= 0)
+ dbgs() << '+';
+ dbgs() << Off;
+ }
+ dbgs() << " }\n";
+ });
+
+#ifndef NDEBUG
+ // Verify that all registers were handled.
+ bool MissedReg = false;
+ for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+ unsigned R = x;
+ dbgs() << PrintReg(R, TRI) << ' ';
+ MissedReg = true;
+ }
+ if (MissedReg)
+ llvm_unreachable("...there are unhandled callee-saved registers!");
+#endif
+
+ return true;
+}
+
+
+void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
+ const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
+ MachineBasicBlock &MB = *AI->getParent();
+ DebugLoc DL = AI->getDebugLoc();
+ unsigned A = AI->getOperand(2).getImm();
+
+ // Have
+ // Rd = alloca Rs, #A
+ //
+ // If Rs and Rd are different registers, use this sequence:
+ // Rd = sub(r29, Rs)
+ // r29 = sub(r29, Rs)
+ // Rd = and(Rd, #-A) ; if necessary
+ // r29 = and(r29, #-A) ; if necessary
+ // Rd = add(Rd, #CF) ; CF size aligned to at most A
+ // otherwise, do
+ // Rd = sub(r29, Rs)
+ // Rd = and(Rd, #-A) ; if necessary
+ // r29 = Rd
+ // Rd = add(Rd, #CF) ; CF size aligned to at most A
+
+ MachineOperand &RdOp = AI->getOperand(0);
+ MachineOperand &RsOp = AI->getOperand(1);
+ unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
+
+ // Rd = sub(r29, Rs)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
+ .addReg(SP)
+ .addReg(Rs);
+ if (Rs != Rd) {
+ // r29 = sub(r29, Rs)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
+ .addReg(SP)
+ .addReg(Rs);
+ }
+ if (A > 8) {
+ // Rd = and(Rd, #-A)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
+ .addReg(Rd)
+ .addImm(-int64_t(A));
+ if (Rs != Rd)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
+ .addReg(SP)
+ .addImm(-int64_t(A));
+ }
+ if (Rs == Rd) {
+ // r29 = Rd
+ BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
+ .addReg(Rd);
+ }
+ if (CF > 0) {
+ // Rd = add(Rd, #CF)
+ BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
+ .addReg(Rd)
+ .addImm(CF);
+ }
+}
+
+
+bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (!MFI->hasVarSizedObjects())
+ return false;
+ unsigned MaxA = MFI->getMaxAlignment();
+ if (MaxA <= getStackAlignment())
+ return false;
+ return true;
+}
+
+
+const MachineInstr *HexagonFrameLowering::getAlignaInstr(
+ const MachineFunction &MF) const {
+ for (auto &B : MF)
+ for (auto &I : B)
+ if (I.getOpcode() == Hexagon::ALIGNA)
+ return &I;
+ return nullptr;
+}
+
+
+// FIXME: Use Function::optForSize().
+inline static bool isOptSize(const MachineFunction &MF) {
+ AttributeSet AF = MF.getFunction()->getAttributes();
+ return AF.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+}
+
+inline static bool isMinSize(const MachineFunction &MF) {
+ return MF.getFunction()->optForMinSize();
+}
+
+
+/// Determine whether the callee-saved register saves and restores should
+/// be generated via inline code. If this function returns "true", inline
+/// code will be generated. If this function returns "false", additional
+/// checks are performed, which may still lead to the inline code.
+bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
+ return true;
+ if (!isOptSize(MF) && !isMinSize(MF))
+ if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
+ return true;
+
+ // Check if CSI only has double registers, and if the registers form
+ // a contiguous block starting from D8.
+ BitVector Regs(Hexagon::NUM_TARGET_REGS);
+ for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+ unsigned R = CSI[i].getReg();
+ if (!Hexagon::DoubleRegsRegClass.contains(R))
+ return true;
+ Regs[R] = true;
+ }
+ int F = Regs.find_first();
+ if (F != Hexagon::D8)
+ return true;
+ while (F >= 0) {
+ int N = Regs.find_next(F);
+ if (N >= 0 && N != F+1)
+ return true;
+ F = N;
+ }
+
+ return false;
+}
+
+
+bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (shouldInlineCSR(MF, CSI))
+ return false;
+ unsigned NumCSI = CSI.size();
+ if (NumCSI <= 1)
+ return false;
+
+ unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
+ : SpillFuncThreshold;
+ return Threshold < NumCSI;
+}
+
+
+bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+ const CSIVect &CSI) const {
+ if (shouldInlineCSR(MF, CSI))
+ return false;
+ unsigned NumCSI = CSI.size();
+ unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
+ : SpillFuncThreshold;
+ return Threshold < NumCSI;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 0000000..683b303
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,109 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
+
+class HexagonFrameLowering : public TargetFrameLowering {
+public:
+ explicit HexagonFrameLowering()
+ : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {}
+
+ // All of the prolog/epilog functionality, including saving and restoring
+ // callee-saved registers is handled in emitPrologue. This is to have the
+ // logic for shrink-wrapping in one place.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
+ override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+ override {}
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override {
+ return true;
+ }
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override {
+ return true;
+ }
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ bool targetHandlesStackFrameRounding() const override {
+ return true;
+ }
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+ bool hasFP(const MachineFunction &MF) const override;
+
+ const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
+ const override {
+ static const SpillSlot Offsets[] = {
+ { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 },
+ { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 },
+ { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 },
+ { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 },
+ { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
+ { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
+ };
+ NumEntries = array_lengthof(Offsets);
+ return Offsets;
+ }
+
+ bool assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI)
+ const override;
+
+ bool needsAligna(const MachineFunction &MF) const;
+ const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
+
+ void insertCFIInstructions(MachineFunction &MF) const;
+
+private:
+ typedef std::vector<CalleeSavedInfo> CSIVect;
+
+ void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII,
+ unsigned SP, unsigned CF) const;
+ void insertPrologueInBlock(MachineBasicBlock &MBB) const;
+ void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+ bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+ const HexagonRegisterInfo &HRI) const;
+ bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+ const HexagonRegisterInfo &HRI) const;
+ void insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const;
+
+ void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
+ bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
+ bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const;
+
+ void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
+ MachineBasicBlock *&EpilogB) const;
+
+ bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
+ bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
+ bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
new file mode 100644
index 0000000..f26e2ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -0,0 +1,259 @@
+//===--- HexagonGenExtract.cpp --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
+ cl::Hidden, cl::desc("Cutoff for generating \"extract\""
+ " instructions"));
+
+// This prevents generating extract instructions that have the offset of 0.
+// One of the reasons for "extract" is to put a sequence of bits in a regis-
+// ter, starting at offset 0 (so that these bits can then be used by an
+// "insert"). If the bits are already at offset 0, it is better not to gene-
+// rate "extract", since logical bit operations can be merged into compound
+// instructions (as opposed to "extract").
+static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
+ cl::desc("No extract instruction with offset 0"));
+
+static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
+ cl::desc("Require & in extract patterns"));
+
+namespace llvm {
+ void initializeHexagonGenExtractPass(PassRegistry&);
+ FunctionPass *createHexagonGenExtract();
+}
+
+
+namespace {
+ class HexagonGenExtract : public FunctionPass {
+ public:
+ static char ID;
+ HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
+ initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const override {
+ return "Hexagon generate \"extract\" instructions";
+ }
+ virtual bool runOnFunction(Function &F) override;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ private:
+ bool visitBlock(BasicBlock *B);
+ bool convert(Instruction *In);
+
+ unsigned ExtractCount;
+ DominatorTree *DT;
+ };
+
+ char HexagonGenExtract::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
+ "\"extract\" instructions", false, false)
+
+
+bool HexagonGenExtract::convert(Instruction *In) {
+ using namespace PatternMatch;
+ Value *BF = 0;
+ ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+ BasicBlock *BB = In->getParent();
+ LLVMContext &Ctx = BB->getContext();
+ bool LogicalSR;
+
+ // (and (shl (lshr x, #sr), #sl), #m)
+ LogicalSR = true;
+ bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+
+ if (!Match) {
+ // (and (shl (ashr x, #sr), #sl), #m)
+ LogicalSR = false;
+ Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (shl x, #sl), #m)
+ LogicalSR = true;
+ CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
+ m_ConstantInt(CM)));
+ if (Match && NoSR0)
+ return false;
+ }
+ if (!Match) {
+ // (and (lshr x, #sr), #m)
+ LogicalSR = true;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ // (and (ashr x, #sr), #m)
+ LogicalSR = false;
+ CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+ Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CM)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (lshr x, #sr), #sl)
+ LogicalSR = true;
+ Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match) {
+ CM = 0;
+ // (shl (ashr x, #sr), #sl)
+ LogicalSR = false;
+ Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+ m_ConstantInt(CSL)));
+ }
+ if (!Match)
+ return false;
+
+ Type *Ty = BF->getType();
+ if (!Ty->isIntegerTy())
+ return false;
+ unsigned BW = Ty->getPrimitiveSizeInBits();
+ if (BW != 32 && BW != 64)
+ return false;
+
+ uint32_t SR = CSR->getZExtValue();
+ uint32_t SL = CSL->getZExtValue();
+
+ if (!CM) {
+ // If there was no and, and the shift left did not remove all potential
+ // sign bits created by the shift right, then extractu cannot reproduce
+ // this value.
+ if (!LogicalSR && (SR > SL))
+ return false;
+ APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
+ CM = ConstantInt::get(Ctx, A);
+ }
+
+ // CM is the shifted-left mask. Shift it back right to remove the zero
+ // bits on least-significant positions.
+ APInt M = CM->getValue().lshr(SL);
+ uint32_t T = M.countTrailingOnes();
+
+ // During the shifts some of the bits will be lost. Calculate how many
+ // of the original value will remain after shift right and then left.
+ uint32_t U = BW - std::max(SL, SR);
+ // The width of the extracted field is the minimum of the original bits
+ // that remain after the shifts and the number of contiguous 1s in the mask.
+ uint32_t W = std::min(U, T);
+ if (W == 0)
+ return false;
+
+ // Check if the extracted bits are contained within the mask that it is
+ // and-ed with. The extract operation will copy these bits, and so the
+ // mask cannot any holes in it that would clear any of the bits of the
+ // extracted field.
+ if (!LogicalSR) {
+ // If the shift right was arithmetic, it could have included some 1 bits.
+ // It is still ok to generate extract, but only if the mask eliminates
+ // those bits (i.e. M does not have any bits set beyond U).
+ APInt C = APInt::getHighBitsSet(BW, BW-U);
+ if (M.intersects(C) || !APIntOps::isMask(W, M))
+ return false;
+ } else {
+ // Check if M starts with a contiguous sequence of W times 1 bits. Get
+ // the low U bits of M (which eliminates the 0 bits shifted in on the
+ // left), and check if the result is APInt's "mask":
+ if (!APIntOps::isMask(W, M.getLoBits(U)))
+ return false;
+ }
+
+ IRBuilder<> IRB(In);
+ Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
+ : Intrinsic::hexagon_S2_extractup;
+ Module *Mod = BB->getParent()->getParent();
+ Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+ Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
+ if (SL != 0)
+ NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
+ In->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+
+bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+ // Depth-first, bottom-up traversal.
+ DomTreeNode *DTN = DT->getNode(B);
+ typedef GraphTraits<DomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType Iter;
+ for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+ visitBlock((*I)->getBlock());
+
+ // Allow limiting the number of generated extracts for debugging purposes.
+ bool HasCutoff = ExtractCutoff.getPosition();
+ unsigned Cutoff = ExtractCutoff;
+
+ bool Changed = false;
+ BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
+ while (true) {
+ if (HasCutoff && (ExtractCount >= Cutoff))
+ return Changed;
+ bool Last = (I == Begin);
+ if (!Last)
+ NextI = std::prev(I);
+ Instruction *In = &*I;
+ bool Done = convert(In);
+ if (HasCutoff && Done)
+ ExtractCount++;
+ Changed |= Done;
+ if (Last)
+ break;
+ I = NextI;
+ }
+ return Changed;
+}
+
+
+bool HexagonGenExtract::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ bool Changed;
+
+ // Traverse the function bottom-up, to see super-expressions before their
+ // sub-expressions.
+ BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
+ Changed = visitBlock(Entry);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenExtract() {
+ return new HexagonGenExtract();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
new file mode 100644
index 0000000..64a2b6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -0,0 +1,1599 @@
+//===--- HexagonGenInsert.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexinsert"
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
+// The distance cutoff is selected based on the precheckin-perf results:
+// cutoffs 20, 25, 35, and 40 are worse than 30.
+static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
+ "generation."));
+
+static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
+static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
+ "generation"));
+
+static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+// Whether to construct constant values via "insert". Could eliminate constant
+// extenders, but often not practical.
+static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore);
+
+namespace {
+ // The preprocessor gets confused when the DEBUG macro is passed larger
+ // chunks of code. Use this function to detect debugging.
+ inline bool isDebug() {
+#ifndef NDEBUG
+ return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE);
+#else
+ return false;
+#endif
+ }
+}
+
+
+namespace {
+ // Set of virtual registers, based on BitVector.
+ struct RegisterSet : private BitVector {
+ RegisterSet() = default;
+ explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+
+ using BitVector::clear;
+
+ unsigned find_first() const {
+ int First = BitVector::find_first();
+ if (First < 0)
+ return 0;
+ return x2v(First);
+ }
+
+ unsigned find_next(unsigned Prev) const {
+ int Next = BitVector::find_next(v2x(Prev));
+ if (Next < 0)
+ return 0;
+ return x2v(Next);
+ }
+
+ RegisterSet &insert(unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return static_cast<RegisterSet&>(BitVector::set(Idx));
+ }
+ RegisterSet &remove(unsigned R) {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return *this;
+ return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ }
+
+ RegisterSet &insert(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ }
+ RegisterSet &remove(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ }
+
+ reference operator[](unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return BitVector::operator[](Idx);
+ }
+ bool operator[](unsigned R) const {
+ unsigned Idx = v2x(R);
+ assert(Idx < size());
+ return BitVector::operator[](Idx);
+ }
+ bool has(unsigned R) const {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return false;
+ return BitVector::test(Idx);
+ }
+
+ bool empty() const {
+ return !BitVector::any();
+ }
+ bool includes(const RegisterSet &Rs) const {
+ // A.BitVector::test(B) <=> A-B != {}
+ return !Rs.BitVector::test(*this);
+ }
+ bool intersects(const RegisterSet &Rs) const {
+ return BitVector::anyCommon(Rs);
+ }
+
+ private:
+ void ensure(unsigned Idx) {
+ if (size() <= Idx)
+ resize(std::max(Idx+1, 32U));
+ }
+ static inline unsigned v2x(unsigned v) {
+ return TargetRegisterInfo::virtReg2Index(v);
+ }
+ static inline unsigned x2v(unsigned x) {
+ return TargetRegisterInfo::index2VirtReg(x);
+ }
+ };
+
+
+ struct PrintRegSet {
+ PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+ : RS(S), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS,
+ const PrintRegSet &P);
+ private:
+ const RegisterSet &RS;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+ OS << '{';
+ for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+ OS << ' ' << PrintReg(R, P.TRI);
+ OS << " }";
+ return OS;
+ }
+}
+
+
+namespace {
+ // A convenience class to associate unsigned numbers (such as virtual
+ // registers) with unsigned numbers.
+ struct UnsignedMap : public DenseMap<unsigned,unsigned> {
+ UnsignedMap() : BaseType() {}
+ private:
+ typedef DenseMap<unsigned,unsigned> BaseType;
+ };
+
+ // A utility to establish an ordering between virtual registers:
+ // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
+ // This is meant as a cache for the ordering of virtual registers defined
+ // by a potentially expensive comparison function, or obtained by a proce-
+ // dure that should not be repeated each time two registers are compared.
+ struct RegisterOrdering : public UnsignedMap {
+ RegisterOrdering() : UnsignedMap() {}
+ unsigned operator[](unsigned VR) const {
+ const_iterator F = find(VR);
+ assert(F != end());
+ return F->second;
+ }
+ // Add operator(), so that objects of this class can be used as
+ // comparators in std::sort et al.
+ bool operator() (unsigned VR1, unsigned VR2) const {
+ return operator[](VR1) < operator[](VR2);
+ }
+ };
+}
+
+
+namespace {
+ // Ordering of bit values. This class does not have operator[], but
+ // is supplies a comparison operator() for use in std:: algorithms.
+ // The order is as follows:
+ // - 0 < 1 < ref
+ // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
+ // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
+ struct BitValueOrdering {
+ BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
+ bool operator() (const BitTracker::BitValue &V1,
+ const BitTracker::BitValue &V2) const;
+ const RegisterOrdering &BaseOrd;
+ };
+}
+
+
+bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
+ const BitTracker::BitValue &V2) const {
+ if (V1 == V2)
+ return false;
+ // V1==0 => true, V2==0 => false
+ if (V1.is(0) || V2.is(0))
+ return V1.is(0);
+ // Neither of V1,V2 is 0, and V1!=V2.
+ // V2==1 => false, V1==1 => true
+ if (V2.is(1) || V1.is(1))
+ return !V2.is(1);
+ // Both V1,V2 are refs.
+ unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
+ if (Ind1 != Ind2)
+ return Ind1 < Ind2;
+ // If V1.Pos==V2.Pos
+ assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
+ return V1.RefI.Pos < V2.RefI.Pos;
+}
+
+
+namespace {
+ // Cache for the BitTracker's cell map. Map lookup has a logarithmic
+ // complexity, this class will memoize the lookup results to reduce
+ // the access time for repeated lookups of the same cell.
+ struct CellMapShadow {
+ CellMapShadow(const BitTracker &T) : BT(T) {}
+ const BitTracker::RegisterCell &lookup(unsigned VR) {
+ unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
+ // Grow the vector to at least 32 elements.
+ if (RInd >= CVect.size())
+ CVect.resize(std::max(RInd+16, 32U), 0);
+ const BitTracker::RegisterCell *CP = CVect[RInd];
+ if (CP == 0)
+ CP = CVect[RInd] = &BT.lookup(VR);
+ return *CP;
+ }
+
+ const BitTracker &BT;
+
+ private:
+ typedef std::vector<const BitTracker::RegisterCell*> CellVectType;
+ CellVectType CVect;
+ };
+}
+
+
+namespace {
+ // Comparator class for lexicographic ordering of virtual registers
+ // according to the corresponding BitTracker::RegisterCell objects.
+ struct RegisterCellLexCompare {
+ RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
+ : BitOrd(BO), CM(M) {}
+ bool operator() (unsigned VR1, unsigned VR2) const;
+ private:
+ const BitValueOrdering &BitOrd;
+ CellMapShadow &CM;
+ };
+
+ // Comparator class for lexicographic ordering of virtual registers
+ // according to the specified bits of the corresponding BitTracker::
+ // RegisterCell objects.
+ // Specifically, this class will be used to compare bit B of a register
+ // cell for a selected virtual register R with bit N of any register
+ // other than R.
+ struct RegisterCellBitCompareSel {
+ RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
+ const BitValueOrdering &BO, CellMapShadow &M)
+ : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
+ bool operator() (unsigned VR1, unsigned VR2) const;
+ private:
+ const unsigned SelR, SelB;
+ const unsigned BitN;
+ const BitValueOrdering &BitOrd;
+ CellMapShadow &CM;
+ };
+}
+
+
+bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
+ // Ordering of registers, made up from two given orderings:
+ // - the ordering of the register numbers, and
+ // - the ordering of register cells.
+ // Def. R1 < R2 if:
+ // - cell(R1) < cell(R2), or
+ // - cell(R1) == cell(R2), and index(R1) < index(R2).
+ //
+ // For register cells, the ordering is lexicographic, with index 0 being
+ // the most significant.
+ if (VR1 == VR2)
+ return false;
+
+ const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
+ uint16_t W1 = RC1.width(), W2 = RC2.width();
+ for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
+ const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
+ if (V1 != V2)
+ return BitOrd(V1, V2);
+ }
+ // Cells are equal up until the common length.
+ if (W1 != W2)
+ return W1 < W2;
+
+ return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
+}
+
+
+bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
+ if (VR1 == VR2)
+ return false;
+ const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
+ const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
+ uint16_t W1 = RC1.width(), W2 = RC2.width();
+ uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
+ uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
+ // If Bit1 exceeds the width of VR1, then:
+ // - return false, if at the same time Bit2 exceeds VR2, or
+ // - return true, otherwise.
+ // (I.e. "a bit value that does not exist is less than any bit value
+ // that does exist".)
+ if (W1 <= Bit1)
+ return Bit2 < W2;
+ // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
+ if (W2 <= Bit2)
+ return false;
+
+ const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
+ if (V1 != V2)
+ return BitOrd(V1, V2);
+ return false;
+}
+
+
+namespace {
+ class OrderedRegisterList {
+ typedef std::vector<unsigned> ListType;
+ public:
+ OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+ void insert(unsigned VR);
+ void remove(unsigned VR);
+ unsigned operator[](unsigned Idx) const {
+ assert(Idx < Seq.size());
+ return Seq[Idx];
+ }
+ unsigned size() const {
+ return Seq.size();
+ }
+
+ typedef ListType::iterator iterator;
+ typedef ListType::const_iterator const_iterator;
+ iterator begin() { return Seq.begin(); }
+ iterator end() { return Seq.end(); }
+ const_iterator begin() const { return Seq.begin(); }
+ const_iterator end() const { return Seq.end(); }
+
+ // Convenience function to convert an iterator to the corresponding index.
+ unsigned idx(iterator It) const { return It-begin(); }
+ private:
+ ListType Seq;
+ const RegisterOrdering &Ord;
+ };
+
+
+ struct PrintORL {
+ PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
+ : RL(L), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
+ private:
+ const OrderedRegisterList &RL;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
+ OS << '(';
+ OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
+ for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
+ if (I != B)
+ OS << ", ";
+ OS << PrintReg(*I, P.TRI);
+ }
+ OS << ')';
+ return OS;
+ }
+}
+
+
+void OrderedRegisterList::insert(unsigned VR) {
+ iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ if (L == Seq.end())
+ Seq.push_back(VR);
+ else
+ Seq.insert(L, VR);
+}
+
+
+void OrderedRegisterList::remove(unsigned VR) {
+ iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ assert(L != Seq.end());
+ Seq.erase(L);
+}
+
+
+namespace {
+ // A record of the insert form. The fields correspond to the operands
+ // of the "insert" instruction:
+ // ... = insert(SrcR, InsR, #Wdh, #Off)
+ struct IFRecord {
+ IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
+ : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
+ unsigned SrcR, InsR;
+ uint16_t Wdh, Off;
+ };
+
+ struct PrintIFR {
+ PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
+ : IFR(R), TRI(RI) {}
+ private:
+ const IFRecord &IFR;
+ const TargetRegisterInfo *TRI;
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
+ unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
+ OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI)
+ << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
+ return OS;
+ }
+
+ typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet;
+}
+
+
+namespace llvm {
+ void initializeHexagonGenInsertPass(PassRegistry&);
+ FunctionPass *createHexagonGenInsert();
+}
+
+
+namespace {
+ class HexagonGenInsert : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) {
+ initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon generate \"insert\" instructions";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType;
+
+ void buildOrderingMF(RegisterOrdering &RO) const;
+ void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
+ bool isIntClass(const TargetRegisterClass *RC) const;
+ bool isConstant(unsigned VR) const;
+ bool isSmallConstant(unsigned VR) const;
+ bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
+ uint16_t L, uint16_t S) const;
+ bool findSelfReference(unsigned VR) const;
+ bool findNonSelfReference(unsigned VR) const;
+ void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
+ void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
+ unsigned distance(const MachineBasicBlock *FromB,
+ const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+ PairMapType &M) const;
+ unsigned distance(MachineBasicBlock::const_iterator FromI,
+ MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+ PairMapType &M) const;
+ bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
+ void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
+ void findRemovableRegisters(unsigned VR, IFRecord IF,
+ RegisterSet &RMs) const;
+ void computeRemovableRegisters();
+
+ void pruneEmptyLists();
+ void pruneCoveredSets(unsigned VR);
+ void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
+ void pruneRegCopies(unsigned VR);
+ void pruneCandidates();
+ void selectCandidates();
+ bool generateInserts();
+
+ bool removeDeadCode(MachineDomTreeNode *N);
+
+ // IFRecord coupled with a set of potentially removable registers:
+ typedef std::vector<IFRecordWithRegSet> IFListType;
+ typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType
+
+ void dump_map() const;
+
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+ MachineFunction *MFN;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ CellMapShadow *CMS;
+
+ RegisterOrdering BaseOrd;
+ RegisterOrdering CellOrd;
+ IFMapType IFMap;
+ };
+
+ char HexagonGenInsert::ID = 0;
+}
+
+
+void HexagonGenInsert::dump_map() const {
+ typedef IFMapType::const_iterator iterator;
+ for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ dbgs() << " " << PrintReg(I->first, HRI) << ":\n";
+ const IFListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", "
+ << PrintRegSet(LL[i].second, HRI) << '\n';
+ }
+}
+
+
+void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
+ unsigned Index = 0;
+ typedef MachineFunction::const_iterator mf_iterator;
+ for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
+ const MachineBasicBlock &B = *A;
+ if (!CMS->BT.reached(&B))
+ continue;
+ typedef MachineBasicBlock::const_iterator mb_iterator;
+ for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned R = MO.getReg();
+ assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
+ if (TargetRegisterInfo::isVirtualRegister(R))
+ RO.insert(std::make_pair(R, Index++));
+ }
+ }
+ }
+ }
+ // Since some virtual registers may have had their def and uses eliminated,
+ // they are no longer referenced in the code, and so they will not appear
+ // in the map.
+}
+
+
+void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
+ RegisterOrdering &RO) const {
+ // Create a vector of all virtual registers (collect them from the base
+ // ordering RB), and then sort it using the RegisterCell comparator.
+ BitValueOrdering BVO(RB);
+ RegisterCellLexCompare LexCmp(BVO, *CMS);
+ typedef std::vector<unsigned> SortableVectorType;
+ SortableVectorType VRs;
+ for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
+ VRs.push_back(I->first);
+ std::sort(VRs.begin(), VRs.end(), LexCmp);
+ // Transfer the results to the outgoing register ordering.
+ for (unsigned i = 0, n = VRs.size(); i < n; ++i)
+ RO.insert(std::make_pair(VRs[i], i));
+}
+
+
+inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
+ return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
+}
+
+
+bool HexagonGenInsert::isConstant(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitTracker::BitValue &BV = RC[i];
+ if (BV.is(0) || BV.is(1))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+ if (W > 64)
+ return false;
+ uint64_t V = 0, B = 1;
+ for (uint16_t i = 0; i < W; ++i) {
+ const BitTracker::BitValue &BV = RC[i];
+ if (BV.is(1))
+ V |= B;
+ else if (!BV.is(0))
+ return false;
+ B <<= 1;
+ }
+
+ // For 32-bit registers, consider: Rd = #s16.
+ if (W == 32)
+ return isInt<16>(V);
+
+ // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
+ return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
+}
+
+
+bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
+ unsigned InsR, uint16_t L, uint16_t S) const {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
+ const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
+ // Only integet (32-/64-bit) register classes.
+ if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
+ return false;
+ // The "source" register must be of the same class as DstR.
+ if (DstRC != SrcRC)
+ return false;
+ if (DstRC == InsRC)
+ return true;
+ // A 64-bit register can only be generated from other 64-bit registers.
+ if (DstRC == &Hexagon::DoubleRegsRegClass)
+ return false;
+ // Otherwise, the L and S cannot span 32-bit word boundary.
+ if (S < 32 && S+L > 32)
+ return false;
+ return true;
+}
+
+
+bool HexagonGenInsert::findSelfReference(unsigned VR) const {
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
+ BitTracker::RegisterCell RC = CMS->lookup(VR);
+ for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
+ RegisterSet &Defs) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Defs.insert(R);
+ }
+}
+
+
+void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
+ RegisterSet &Uses) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.insert(R);
+ }
+}
+
+
+unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
+ const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+ PairMapType &M) const {
+ // Forward distance from the end of a block to the beginning of it does
+ // not make sense. This function should not be called with FromB == ToB.
+ assert(FromB != ToB);
+
+ unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
+ // If we have already computed it, return the cached result.
+ PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
+ if (F != M.end())
+ return F->second;
+ unsigned ToRPO = RPO.lookup(ToN);
+
+ unsigned MaxD = 0;
+ typedef MachineBasicBlock::const_pred_iterator pred_iterator;
+ for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *PB = *I;
+ // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
+ // along that path will be 0, and we don't need to do any calculations
+ // on it.
+ if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
+ continue;
+ unsigned D = PB->size() + distance(FromB, PB, RPO, M);
+ if (D > MaxD)
+ MaxD = D;
+ }
+
+ // Memoize the result for later lookup.
+ M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
+ return MaxD;
+}
+
+
+unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
+ MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+ PairMapType &M) const {
+ const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
+ if (FB == TB)
+ return std::distance(FromI, ToI);
+ unsigned D1 = std::distance(TB->begin(), ToI);
+ unsigned D2 = distance(FB, TB, RPO, M);
+ unsigned D3 = std::distance(FromI, FB->end());
+ return D1+D2+D3;
+}
+
+
+bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
+ OrderedRegisterList &AVs) {
+ if (isDebug()) {
+ dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI)
+ << " AVs: " << PrintORL(AVs, HRI) << "\n";
+ }
+ if (AVs.size() == 0)
+ return false;
+
+ typedef OrderedRegisterList::iterator iterator;
+ BitValueOrdering BVO(BaseOrd);
+ const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+ uint16_t W = RC.width();
+
+ typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift)
+ typedef std::vector<RSRecord> RSListType;
+ // Have a map, with key being the matching prefix length, and the value
+ // being the list of pairs (R,S), where R's prefix matches VR at S.
+ // (DenseMap<uint16_t,RSListType> fails to instantiate.)
+ typedef DenseMap<unsigned,RSListType> LRSMapType;
+ LRSMapType LM;
+
+ // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
+ // and find matching prefixes from AVs with the rotated RC. Such a prefix
+ // would match a string of bits (of length L) in RC starting at S.
+ for (uint16_t S = 0; S < W; ++S) {
+ iterator B = AVs.begin(), E = AVs.end();
+ // The registers in AVs are ordered according to the lexical order of
+ // the corresponding register cells. This means that the range of regis-
+ // ters in AVs that match a prefix of length L+1 will be contained in
+ // the range that matches a prefix of length L. This means that we can
+ // keep narrowing the search space as the prefix length goes up. This
+ // helps reduce the overall complexity of the search.
+ uint16_t L;
+ for (L = 0; L < W-S; ++L) {
+ // Compare against VR's bits starting at S, which emulates rotation
+ // of VR by S.
+ RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
+ iterator NewB = std::lower_bound(B, E, VR, RCB);
+ iterator NewE = std::upper_bound(NewB, E, VR, RCB);
+ // For the registers that are eliminated from the next range, L is
+ // the longest prefix matching VR at position S (their prefixes
+ // differ from VR at S+L). If L>0, record this information for later
+ // use.
+ if (L > 0) {
+ for (iterator I = B; I != NewB; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ for (iterator I = NewE; I != E; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ }
+ B = NewB, E = NewE;
+ if (B == E)
+ break;
+ }
+ // Record the final register range. If this range is non-empty, then
+ // L=W-S.
+ assert(B == E || L == W-S);
+ if (B != E) {
+ for (iterator I = B; I != E; ++I)
+ LM[L].push_back(std::make_pair(*I, S));
+ // If B!=E, then we found a range of registers whose prefixes cover the
+ // rest of VR from position S. There is no need to further advance S.
+ break;
+ }
+ }
+
+ if (isDebug()) {
+ dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n";
+ for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
+ dbgs() << " L=" << I->first << ':';
+ const RSListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@"
+ << LL[i].second << ')';
+ dbgs() << '\n';
+ }
+ }
+
+
+ bool Recorded = false;
+
+ for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
+ unsigned SrcR = *I;
+ int FDi = -1, LDi = -1; // First/last different bit.
+ const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
+ uint16_t AW = AC.width();
+ for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
+ if (RC[i] == AC[i])
+ continue;
+ if (FDi == -1)
+ FDi = i;
+ LDi = i;
+ }
+ if (FDi == -1)
+ continue; // TODO (future): Record identical registers.
+ // Look for a register whose prefix could patch the range [FD..LD]
+ // where VR and SrcR differ.
+ uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
+ uint16_t MinL = LD-FD+1;
+ for (uint16_t L = MinL; L < W; ++L) {
+ LRSMapType::iterator F = LM.find(L);
+ if (F == LM.end())
+ continue;
+ RSListType &LL = F->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+ uint16_t S = LL[i].second;
+ // MinL is the minimum length of the prefix. Any length above MinL
+ // allows some flexibility as to where the prefix can start:
+ // given the extra length EL=L-MinL, the prefix must start between
+ // max(0,FD-EL) and FD.
+ if (S > FD) // Starts too late.
+ continue;
+ uint16_t EL = L-MinL;
+ uint16_t LowS = (EL < FD) ? FD-EL : 0;
+ if (S < LowS) // Starts too early.
+ continue;
+ unsigned InsR = LL[i].first;
+ if (!isValidInsertForm(VR, SrcR, InsR, L, S))
+ continue;
+ if (isDebug()) {
+ dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI)
+ << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#"
+ << S << ")\n";
+ }
+ IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
+ IFMap[VR].push_back(RR);
+ Recorded = true;
+ }
+ }
+ }
+
+ return Recorded;
+}
+
+
+void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
+ OrderedRegisterList &AVs) {
+ if (isDebug())
+ dbgs() << "visiting block BB#" << B->getNumber() << "\n";
+
+ // First, check if this block is reachable at all. If not, the bit tracker
+ // will not have any information about registers in it.
+ if (!CMS->BT.reached(B))
+ return;
+
+ bool DoConst = OptConst;
+ // Keep a separate set of registers defined in this block, so that we
+ // can remove them from the list of available registers once all DT
+ // successors have been processed.
+ RegisterSet BlockDefs, InsDefs;
+ for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ InsDefs.clear();
+ getInstrDefs(MI, InsDefs);
+ // Leave those alone. They are more transparent than "insert".
+ bool Skip = MI->isCopy() || MI->isRegSequence();
+
+ if (!Skip) {
+ // Visit all defined registers, and attempt to find the corresponding
+ // "insert" representations.
+ for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
+ // Do not collect registers that are known to be compile-time cons-
+ // tants, unless requested.
+ if (!DoConst && isConstant(VR))
+ continue;
+ // If VR's cell contains a reference to VR, then VR cannot be defined
+ // via "insert". If VR is a constant that can be generated in a single
+ // instruction (without constant extenders), generating it via insert
+ // makes no sense.
+ if (findSelfReference(VR) || isSmallConstant(VR))
+ continue;
+
+ findRecordInsertForms(VR, AVs);
+ }
+ }
+
+ // Insert the defined registers into the list of available registers
+ // after they have been processed.
+ for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
+ AVs.insert(VR);
+ BlockDefs.insert(InsDefs);
+ }
+
+ MachineDomTreeNode *N = MDT->getNode(B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ typedef GTN::ChildIteratorType ChildIter;
+ for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ collectInBlock(SB, AVs);
+ }
+
+ for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
+ AVs.remove(VR);
+}
+
+
+void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
+ RegisterSet &RMs) const {
+ // For a given register VR and a insert form, find the registers that are
+ // used by the current definition of VR, and which would no longer be
+ // needed for it after the definition of VR is replaced with the insert
+ // form. These are the registers that could potentially become dead.
+ RegisterSet Regs[2];
+
+ unsigned S = 0; // Register set selector.
+ Regs[S].insert(VR);
+
+ while (!Regs[S].empty()) {
+ // Breadth-first search.
+ unsigned OtherS = 1-S;
+ Regs[OtherS].clear();
+ for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
+ Regs[S].remove(R);
+ if (R == IF.SrcR || R == IF.InsR)
+ continue;
+ // Check if a given register has bits that are references to any other
+ // registers. This is to detect situations where the instruction that
+ // defines register R takes register Q as an operand, but R itself does
+ // not contain any bits from Q. Loads are examples of how this could
+ // happen:
+ // R = load Q
+ // In this case (assuming we do not have any knowledge about the loaded
+ // value), we must not treat R as a "conveyance" of the bits from Q.
+ // (The information in BT about R's bits would have them as constants,
+ // in case of zero-extending loads, or refs to R.)
+ if (!findNonSelfReference(R))
+ continue;
+ RMs.insert(R);
+ const MachineInstr *DefI = MRI->getVRegDef(R);
+ assert(DefI);
+ // Do not iterate past PHI nodes to avoid infinite loops. This can
+ // make the final set a bit less accurate, but the removable register
+ // sets are an approximation anyway.
+ if (DefI->isPHI())
+ continue;
+ getInstrUses(DefI, Regs[OtherS]);
+ }
+ S = OtherS;
+ }
+ // The register VR is added to the list as a side-effect of the algorithm,
+ // but it is not "potentially removable". A potentially removable register
+ // is one that may become unused (dead) after conversion to the insert form
+ // IF, and obviously VR (or its replacement) will not become dead by apply-
+ // ing IF.
+ RMs.remove(VR);
+}
+
+
+void HexagonGenInsert::computeRemovableRegisters() {
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ IFListType &LL = I->second;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ findRemovableRegisters(I->first, LL[i].first, LL[i].second);
+ }
+}
+
+
+void HexagonGenInsert::pruneEmptyLists() {
+ // Remove all entries from the map, where the register has no insert forms
+ // associated with it.
+ typedef SmallVector<IFMapType::iterator,16> IterListType;
+ IterListType Prune;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ if (I->second.size() == 0)
+ Prune.push_back(I);
+ }
+ for (unsigned i = 0, n = Prune.size(); i < n; ++i)
+ IFMap.erase(Prune[i]);
+}
+
+
+void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+
+ // First, examine the IF candidates for register VR whose removable-regis-
+ // ter sets are empty. This means that a given candidate will not help eli-
+ // minate any registers, but since "insert" is not a constant-extendable
+ // instruction, using such a candidate may reduce code size if the defini-
+ // tion of VR is constant-extended.
+ // If there exists a candidate with a non-empty set, the ones with empty
+ // sets will not be used and can be removed.
+ MachineInstr *DefVR = MRI->getVRegDef(VR);
+ bool DefEx = HII->isConstExtended(DefVR);
+ bool HasNE = false;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+ if (LL[i].second.empty())
+ continue;
+ HasNE = true;
+ break;
+ }
+ if (!DefEx || HasNE) {
+ // The definition of VR is not constant-extended, or there is a candidate
+ // with a non-empty set. Remove all candidates with empty sets.
+ auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
+ return IR.second.empty();
+ };
+ auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty);
+ if (End != LL.end())
+ LL.erase(End, LL.end());
+ } else {
+ // The definition of VR is constant-extended, and all candidates have
+ // empty removable-register sets. Pick the maximum candidate, and remove
+ // all others. The "maximum" does not have any special meaning here, it
+ // is only so that the candidate that will remain on the list is selec-
+ // ted deterministically.
+ IFRecord MaxIF = LL[0].first;
+ for (unsigned i = 1, n = LL.size(); i < n; ++i) {
+ // If LL[MaxI] < LL[i], then MaxI = i.
+ const IFRecord &IF = LL[i].first;
+ unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
+ unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
+ if (M0 > R0)
+ continue;
+ if (M0 == R0) {
+ if (M1 > R1)
+ continue;
+ if (M1 == R1) {
+ if (MaxIF.Wdh > IF.Wdh)
+ continue;
+ if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
+ continue;
+ }
+ }
+ // MaxIF < IF.
+ MaxIF = IF;
+ }
+ // Remove everything except the maximum candidate. All register sets
+ // are empty, so no need to preserve anything.
+ LL.clear();
+ LL.push_back(std::make_pair(MaxIF, RegisterSet()));
+ }
+
+ // Now, remove those whose sets of potentially removable registers are
+ // contained in another IF candidate for VR. For example, given these
+ // candidates for vreg45,
+ // %vreg45:
+ // (%vreg44,%vreg41,#9,#8), { %vreg42 }
+ // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 }
+ // remove the first one, since it is contained in the second one.
+ for (unsigned i = 0, n = LL.size(); i < n; ) {
+ const RegisterSet &RMi = LL[i].second;
+ unsigned j = 0;
+ while (j < n) {
+ if (j != i && LL[j].second.includes(RMi))
+ break;
+ j++;
+ }
+ if (j == n) { // RMi not contained in anything else.
+ i++;
+ continue;
+ }
+ LL.erase(LL.begin()+i);
+ n = LL.size();
+ }
+}
+
+
+void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
+ PairMapType &M) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+ unsigned Cutoff = VRegDistCutoff;
+ const MachineInstr *DefV = MRI->getVRegDef(VR);
+
+ for (unsigned i = LL.size(); i > 0; --i) {
+ unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
+ const MachineInstr *DefS = MRI->getVRegDef(SR);
+ const MachineInstr *DefI = MRI->getVRegDef(IR);
+ unsigned DSV = distance(DefS, DefV, RPO, M);
+ if (DSV < Cutoff) {
+ unsigned DIV = distance(DefI, DefV, RPO, M);
+ if (DIV < Cutoff)
+ continue;
+ }
+ LL.erase(LL.begin()+(i-1));
+ }
+}
+
+
+void HexagonGenInsert::pruneRegCopies(unsigned VR) {
+ IFMapType::iterator F = IFMap.find(VR);
+ assert(F != IFMap.end());
+ IFListType &LL = F->second;
+
+ auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
+ return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
+ };
+ auto End = std::remove_if(LL.begin(), LL.end(), IsCopy);
+ if (End != LL.end())
+ LL.erase(End, LL.end());
+}
+
+
+void HexagonGenInsert::pruneCandidates() {
+ // Remove candidates that are not beneficial, regardless of the final
+ // selection method.
+ // First, remove candidates whose potentially removable set is a subset
+ // of another candidate's set.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneCoveredSets(I->first);
+
+ UnsignedMap RPO;
+ typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+ RPOTType RPOT(MFN);
+ unsigned RPON = 0;
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ RPO[(*I)->getNumber()] = RPON++;
+
+ PairMapType Memo; // Memoization map for distance calculation.
+ // Remove candidates that would use registers defined too far away.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneUsesTooFar(I->first, RPO, Memo);
+
+ pruneEmptyLists();
+
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+ pruneRegCopies(I->first);
+}
+
+
+namespace {
+ // Class for comparing IF candidates for registers that have multiple of
+ // them. The smaller the candidate, according to this ordering, the better.
+ // First, compare the number of zeros in the associated potentially remova-
+ // ble register sets. "Zero" indicates that the register is very likely to
+ // become dead after this transformation.
+ // Second, compare "averages", i.e. use-count per size. The lower wins.
+ // After that, it does not really matter which one is smaller. Resolve
+ // the tie in some deterministic way.
+ struct IFOrdering {
+ IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
+ : UseC(UC), BaseOrd(BO) {}
+ bool operator() (const IFRecordWithRegSet &A,
+ const IFRecordWithRegSet &B) const;
+ private:
+ void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+ unsigned &Sum) const;
+ const UnsignedMap &UseC;
+ const RegisterOrdering &BaseOrd;
+ };
+}
+
+
+bool IFOrdering::operator() (const IFRecordWithRegSet &A,
+ const IFRecordWithRegSet &B) const {
+ unsigned SizeA = 0, ZeroA = 0, SumA = 0;
+ unsigned SizeB = 0, ZeroB = 0, SumB = 0;
+ stats(A.second, SizeA, ZeroA, SumA);
+ stats(B.second, SizeB, ZeroB, SumB);
+
+ // We will pick the minimum element. The more zeros, the better.
+ if (ZeroA != ZeroB)
+ return ZeroA > ZeroB;
+ // Compare SumA/SizeA with SumB/SizeB, lower is better.
+ uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
+ if (AvgA != AvgB)
+ return AvgA < AvgB;
+
+ // The sets compare identical so far. Resort to comparing the IF records.
+ // The actual values don't matter, this is only for determinism.
+ unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
+ if (OSA != OSB)
+ return OSA < OSB;
+ unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
+ if (OIA != OIB)
+ return OIA < OIB;
+ if (A.first.Wdh != B.first.Wdh)
+ return A.first.Wdh < B.first.Wdh;
+ return A.first.Off < B.first.Off;
+}
+
+
+void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+ unsigned &Sum) const {
+ for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
+ UnsignedMap::const_iterator F = UseC.find(R);
+ assert(F != UseC.end());
+ unsigned UC = F->second;
+ if (UC == 0)
+ Zero++;
+ Sum += UC;
+ Size++;
+ }
+}
+
+
+void HexagonGenInsert::selectCandidates() {
+ // Some registers may have multiple valid candidates. Pick the best one
+ // (or decide not to use any).
+
+ // Compute the "removability" measure of R:
+ // For each potentially removable register R, record the number of regis-
+ // ters with IF candidates, where R appears in at least one set.
+ RegisterSet AllRMs;
+ UnsignedMap UseC, RemC;
+ IFMapType::iterator End = IFMap.end();
+
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ const IFListType &LL = I->second;
+ RegisterSet TT;
+ for (unsigned i = 0, n = LL.size(); i < n; ++i)
+ TT.insert(LL[i].second);
+ for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
+ RemC[R]++;
+ AllRMs.insert(TT);
+ }
+
+ for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
+ typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+ typedef SmallSet<const MachineInstr*,16> InstrSet;
+ InstrSet UIs;
+ // Count as the number of instructions in which R is used, not the
+ // number of operands.
+ use_iterator E = MRI->use_nodbg_end();
+ for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
+ UIs.insert(I->getParent());
+ unsigned C = UIs.size();
+ // Calculate a measure, which is the number of instructions using R,
+ // minus the "removability" count computed earlier.
+ unsigned D = RemC[R];
+ UseC[R] = (C > D) ? C-D : 0; // doz
+ }
+
+
+ bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
+ if (!SelectAll0 && !SelectHas0)
+ SelectAll0 = true;
+
+ // The smaller the number UseC for a given register R, the "less used"
+ // R is aside from the opportunities for removal offered by generating
+ // "insert" instructions.
+ // Iterate over the IF map, and for those registers that have multiple
+ // candidates, pick the minimum one according to IFOrdering.
+ IFOrdering IFO(UseC, BaseOrd);
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ IFListType &LL = I->second;
+ if (LL.empty())
+ continue;
+ // Get the minimum element, remember it and clear the list. If the
+ // element found is adequate, we will put it back on the list, other-
+ // wise the list will remain empty, and the entry for this register
+ // will be removed (i.e. this register will not be replaced by insert).
+ IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
+ assert(MinI != LL.end());
+ IFRecordWithRegSet M = *MinI;
+ LL.clear();
+
+ // We want to make sure that this replacement will have a chance to be
+ // beneficial, and that means that we want to have indication that some
+ // register will be removed. The most likely registers to be eliminated
+ // are the use operands in the definition of I->first. Accept/reject a
+ // candidate based on how many of its uses it can potentially eliminate.
+
+ RegisterSet Us;
+ const MachineInstr *DefI = MRI->getVRegDef(I->first);
+ getInstrUses(DefI, Us);
+ bool Accept = false;
+
+ if (SelectAll0) {
+ bool All0 = true;
+ for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+ if (UseC[R] == 0)
+ continue;
+ All0 = false;
+ break;
+ }
+ Accept = All0;
+ } else if (SelectHas0) {
+ bool Has0 = false;
+ for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+ if (UseC[R] != 0)
+ continue;
+ Has0 = true;
+ break;
+ }
+ Accept = Has0;
+ }
+ if (Accept)
+ LL.push_back(M);
+ }
+
+ // Remove candidates that add uses of removable registers, unless the
+ // removable registers are among replacement candidates.
+ // Recompute the removable registers, since some candidates may have
+ // been eliminated.
+ AllRMs.clear();
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ const IFListType &LL = I->second;
+ if (LL.size() > 0)
+ AllRMs.insert(LL[0].second);
+ }
+ for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+ IFListType &LL = I->second;
+ if (LL.size() == 0)
+ continue;
+ unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
+ if (AllRMs[SR] || AllRMs[IR])
+ LL.clear();
+ }
+
+ pruneEmptyLists();
+}
+
+
+bool HexagonGenInsert::generateInserts() {
+ // Create a new register for each one from IFMap, and store them in the
+ // map.
+ UnsignedMap RegMap;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ unsigned VR = I->first;
+ const TargetRegisterClass *RC = MRI->getRegClass(VR);
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ RegMap[VR] = NewVR;
+ }
+
+ // We can generate the "insert" instructions using potentially stale re-
+ // gisters: SrcR and InsR for a given VR may be among other registers that
+ // are also replaced. This is fine, we will do the mass "rauw" a bit later.
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ MachineInstr *MI = MRI->getVRegDef(I->first);
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned NewR = RegMap[I->first];
+ bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
+ const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
+ : HII->get(Hexagon::S2_insertp);
+ IFRecord IF = I->second[0].first;
+ unsigned Wdh = IF.Wdh, Off = IF.Off;
+ unsigned InsS = 0;
+ if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
+ InsS = Hexagon::subreg_loreg;
+ if (Off >= 32) {
+ InsS = Hexagon::subreg_hireg;
+ Off -= 32;
+ }
+ }
+ // Advance to the proper location for inserting instructions. This could
+ // be B.end().
+ MachineBasicBlock::iterator At = MI;
+ if (MI->isPHI())
+ At = B.getFirstNonPHI();
+
+ BuildMI(B, At, DL, D, NewR)
+ .addReg(IF.SrcR)
+ .addReg(IF.InsR, 0, InsS)
+ .addImm(Wdh)
+ .addImm(Off);
+
+ MRI->clearKillFlags(IF.SrcR);
+ MRI->clearKillFlags(IF.InsR);
+ }
+
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ MachineInstr *DefI = MRI->getVRegDef(I->first);
+ MRI->replaceRegWith(I->first, RegMap[I->first]);
+ DefI->eraseFromParent();
+ }
+
+ return true;
+}
+
+
+bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
+ bool Changed = false;
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ Changed |= removeDeadCode(*I);
+
+ MachineBasicBlock *B = N->getBlock();
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ unsigned Opc = MI->getOpcode();
+ // Do not touch lifetime markers. This is why the target-independent DCE
+ // cannot be used.
+ if (Opc == TargetOpcode::LIFETIME_START ||
+ Opc == TargetOpcode::LIFETIME_END)
+ continue;
+ bool Store = false;
+ if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
+ continue;
+
+ bool AllDead = true;
+ SmallVector<unsigned,2> Regs;
+ for (ConstMIOperands Op(MI); Op.isValid(); ++Op) {
+ if (!Op->isReg() || !Op->isDef())
+ continue;
+ unsigned R = Op->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R) ||
+ !MRI->use_nodbg_empty(R)) {
+ AllDead = false;
+ break;
+ }
+ Regs.push_back(R);
+ }
+ if (!AllDead)
+ continue;
+
+ B->erase(MI);
+ for (unsigned I = 0, N = Regs.size(); I != N; ++I)
+ MRI->markUsesInDebugValueAsUndef(Regs[I]);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
+ bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
+ bool Changed = false;
+ TimerGroup __G("hexinsert");
+ NamedRegionTimer __T("hexinsert", Timing && !TimingDetail);
+
+ // Sanity check: one, but not both.
+ assert(!OptSelectAll0 || !OptSelectHas0);
+
+ IFMap.clear();
+ BaseOrd.clear();
+ CellOrd.clear();
+
+ const auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ HII = ST.getInstrInfo();
+ HRI = ST.getRegisterInfo();
+ MFN = &MF;
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Clean up before any further processing, so that dead code does not
+ // get used in a newly generated "insert" instruction. Have a custom
+ // version of DCE that preserves lifetime markers. Without it, merging
+ // of stack objects can fail to recognize and merge disjoint objects
+ // leading to unnecessary stack growth.
+ Changed = removeDeadCode(MDT->getRootNode());
+
+ const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+ BitTracker BTLoc(HE, MF);
+ BTLoc.trace(isDebug());
+ BTLoc.run();
+ CellMapShadow MS(BTLoc);
+ CMS = &MS;
+
+ buildOrderingMF(BaseOrd);
+ buildOrderingBT(BaseOrd, CellOrd);
+
+ if (isDebug()) {
+ dbgs() << "Cell ordering:\n";
+ for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
+ I != E; ++I) {
+ unsigned VR = I->first, Pos = I->second;
+ dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n";
+ }
+ }
+
+ // Collect candidates for conversion into the insert forms.
+ MachineBasicBlock *RootB = MDT->getRoot();
+ OrderedRegisterList AvailR(CellOrd);
+
+ {
+ NamedRegionTimer _T("collection", "hexinsert", TimingDetail);
+ collectInBlock(RootB, AvailR);
+ // Complete the information gathered in IFMap.
+ computeRemovableRegisters();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after collection:\n";
+ dump_map();
+ }
+
+ if (IFMap.empty())
+ return Changed;
+
+ {
+ NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
+ pruneCandidates();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after pruning:\n";
+ dump_map();
+ }
+
+ if (IFMap.empty())
+ return Changed;
+
+ {
+ NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
+ selectCandidates();
+ }
+
+ if (isDebug()) {
+ dbgs() << "Candidates after selection:\n";
+ dump_map();
+ }
+
+ // Filter out vregs beyond the cutoff.
+ if (VRegIndexCutoff.getPosition()) {
+ unsigned Cutoff = VRegIndexCutoff;
+ typedef SmallVector<IFMapType::iterator,16> IterListType;
+ IterListType Out;
+ for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+ unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first);
+ if (Idx >= Cutoff)
+ Out.push_back(I);
+ }
+ for (unsigned i = 0, n = Out.size(); i < n; ++i)
+ IFMap.erase(Out[i]);
+ }
+ if (IFMap.empty())
+ return Changed;
+
+ {
+ NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
+ generateInserts();
+ }
+
+ return true;
+}
+
+
+FunctionPass *llvm::createHexagonGenInsert() {
+ return new HexagonGenInsert();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
+ "Hexagon generate \"insert\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
+ "Hexagon generate \"insert\" instructions", false, false)
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
new file mode 100644
index 0000000..c059d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -0,0 +1,319 @@
+//===--- HexagonGenMux.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// During instruction selection, MUX instructions are generated for
+// conditional assignments. Since such assignments often present an
+// opportunity to predicate instructions, HexagonExpandCondsets
+// expands MUXes into pairs of conditional transfers, and then proceeds
+// with predication of the producers/consumers of the registers involved.
+// This happens after exiting from the SSA form, but before the machine
+// instruction scheduler. After the scheduler and after the register
+// allocation there can be cases of pairs of conditional transfers
+// resulting from a MUX where neither of them was further predicated. If
+// these transfers are now placed far enough from the instruction defining
+// the predicate register, they cannot use the .new form. In such cases it
+// is better to collapse them back to a single MUX instruction.
+
+#define DEBUG_TYPE "hexmux"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonGenMux();
+ void initializeHexagonGenMuxPass(PassRegistry& Registry);
+}
+
+namespace {
+ class HexagonGenMux : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+ initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon generate mux instructions";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+ struct CondsetInfo {
+ unsigned PredR;
+ unsigned TrueX, FalseX;
+ CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+ };
+ struct DefUseInfo {
+ BitVector Defs, Uses;
+ DefUseInfo() : Defs(), Uses() {}
+ DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
+ };
+ struct MuxInfo {
+ MachineBasicBlock::iterator At;
+ unsigned DefR, PredR;
+ MachineOperand *SrcT, *SrcF;
+ MachineInstr *Def1, *Def2;
+ MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
+ MachineOperand *TOp, MachineOperand *FOp,
+ MachineInstr *D1, MachineInstr *D2)
+ : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1),
+ Def2(D2) {}
+ };
+ typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
+ typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
+ typedef SmallVector<MuxInfo,4> MuxInfoList;
+
+ bool isRegPair(unsigned Reg) const {
+ return Hexagon::DoubleRegsRegClass.contains(Reg);
+ }
+ void getSubRegs(unsigned Reg, BitVector &SRs) const;
+ void expandReg(unsigned Reg, BitVector &Set) const;
+ void getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const;
+ void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM);
+ bool isCondTransfer(unsigned Opc) const;
+ unsigned getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const;
+ bool genMuxInBlock(MachineBasicBlock &B);
+ };
+
+ char HexagonGenMux::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+ "Hexagon generate mux instructions", false, false)
+
+
+void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
+ for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
+ SRs[*I] = true;
+}
+
+
+void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
+ if (isRegPair(Reg))
+ getSubRegs(Reg, Set);
+ else
+ Set[Reg] = true;
+}
+
+
+void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const {
+ // First, get the implicit defs and uses for this instruction.
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &D = HII->get(Opc);
+ if (const MCPhysReg *R = D.ImplicitDefs)
+ while (*R)
+ expandReg(*R++, Defs);
+ if (const MCPhysReg *R = D.ImplicitUses)
+ while (*R)
+ expandReg(*R++, Uses);
+
+ // Look over all operands, and collect explicit defs and uses.
+ for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+ if (!Mo->isReg() || Mo->isImplicit())
+ continue;
+ unsigned R = Mo->getReg();
+ BitVector &Set = Mo->isDef() ? Defs : Uses;
+ expandReg(R, Set);
+ }
+}
+
+
+void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM) {
+ unsigned Index = 0;
+ unsigned NR = HRI->getNumRegs();
+ BitVector Defs(NR), Uses(NR);
+
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ I2X.insert(std::make_pair(MI, Index));
+ Defs.reset();
+ Uses.reset();
+ getDefsUses(MI, Defs, Uses);
+ DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
+ Index++;
+ }
+}
+
+
+bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
+ switch (Opc) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ return true;
+ }
+ return false;
+}
+
+
+unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const {
+ bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
+ if (IsReg1)
+ return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
+ if (IsReg2)
+ return Hexagon::C2_muxri;
+
+ // Neither is a register. The first source is extendable, but the second
+ // is not (s8).
+ if (Src2.isImm() && isInt<8>(Src2.getImm()))
+ return Hexagon::C2_muxii;
+
+ return 0;
+}
+
+
+bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ InstrIndexMap I2X;
+ DefUseInfoMap DUM;
+ buildMaps(B, I2X, DUM);
+
+ typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
+ CondsetMap CM;
+ MuxInfoList ML;
+
+ MachineBasicBlock::iterator NextI, End = B.end();
+ for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
+ MachineInstr *MI = &*I;
+ NextI = std::next(I);
+ unsigned Opc = MI->getOpcode();
+ if (!isCondTransfer(Opc))
+ continue;
+ unsigned DR = MI->getOperand(0).getReg();
+ if (isRegPair(DR))
+ continue;
+
+ unsigned PR = MI->getOperand(1).getReg();
+ unsigned Idx = I2X.lookup(MI);
+ CondsetMap::iterator F = CM.find(DR);
+ bool IfTrue = HII->isPredicatedTrue(Opc);
+
+ // If there is no record of a conditional transfer for this register,
+ // or the predicate register differs, create a new record for it.
+ if (F != CM.end() && F->second.PredR != PR) {
+ CM.erase(F);
+ F = CM.end();
+ }
+ if (F == CM.end()) {
+ auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
+ F = It.first;
+ F->second.PredR = PR;
+ }
+ CondsetInfo &CI = F->second;
+ if (IfTrue)
+ CI.TrueX = Idx;
+ else
+ CI.FalseX = Idx;
+ if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+ continue;
+
+ // There is now a complete definition of DR, i.e. we have the predicate
+ // register, the definition if-true, and definition if-false.
+
+ // First, check if both definitions are far enough from the definition
+ // of the predicate register.
+ unsigned MinX = std::min(CI.TrueX, CI.FalseX);
+ unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
+ unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+ bool NearDef = false;
+ for (unsigned X = SearchX; X < MaxX; ++X) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (!DU.Defs[PR])
+ continue;
+ NearDef = true;
+ break;
+ }
+ if (NearDef)
+ continue;
+
+ // The predicate register is not defined in the last few instructions.
+ // Check if the conversion to MUX is possible (either "up", i.e. at the
+ // place of the earlier partial definition, or "down", where the later
+ // definition is located). Examine all defs and uses between these two
+ // definitions.
+ // SR1, SR2 - source registers from the first and the second definition.
+ MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
+ std::advance(It1, MinX);
+ std::advance(It2, MaxX);
+ MachineInstr *Def1 = It1, *Def2 = It2;
+ MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
+ unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
+ unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+ bool Failure = false, CanUp = true, CanDown = true;
+ for (unsigned X = MinX+1; X < MaxX; X++) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
+ Failure = true;
+ break;
+ }
+ if (CanDown && DU.Defs[SR1])
+ CanDown = false;
+ if (CanUp && DU.Defs[SR2])
+ CanUp = false;
+ }
+ if (Failure || (!CanUp && !CanDown))
+ continue;
+
+ MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
+ MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
+ // Prefer "down", since this will move the MUX farther away from the
+ // predicate definition.
+ MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+ ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
+ }
+
+ for (unsigned I = 0, N = ML.size(); I < N; ++I) {
+ MuxInfo &MX = ML[I];
+ MachineBasicBlock &B = *MX.At->getParent();
+ DebugLoc DL = MX.At->getDebugLoc();
+ unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
+ if (!MxOpc)
+ continue;
+ BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+ .addReg(MX.PredR)
+ .addOperand(*MX.SrcT)
+ .addOperand(*MX.SrcF);
+ B.erase(MX.Def1);
+ B.erase(MX.Def2);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ bool Changed = false;
+ for (auto &I : MF)
+ Changed |= genMuxInBlock(I);
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonGenMux() {
+ return new HexagonGenMux();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
new file mode 100644
index 0000000..d9675b5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -0,0 +1,525 @@
+//===--- HexagonGenPredicate.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gen-pred"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonGenPredicatePass(PassRegistry& Registry);
+ FunctionPass *createHexagonGenPredicate();
+}
+
+namespace {
+ struct Register {
+ unsigned R, S;
+ Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+ Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+ bool operator== (const Register &Reg) const {
+ return R == Reg.R && S == Reg.S;
+ }
+ bool operator< (const Register &Reg) const {
+ return R < Reg.R || (R == Reg.R && S < Reg.S);
+ }
+ };
+ struct PrintRegister {
+ PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
+ private:
+ Register Reg;
+ const TargetRegisterInfo &TRI;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
+ return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S);
+ }
+
+ class HexagonGenPredicate : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) {
+ initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon generate predicate operations";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ typedef SetVector<MachineInstr*> VectOfInst;
+ typedef std::set<Register> SetOfReg;
+ typedef std::map<Register,Register> RegToRegMap;
+
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ SetOfReg PredGPRs;
+ VectOfInst PUsers;
+ RegToRegMap G2P;
+
+ bool isPredReg(unsigned R);
+ void collectPredicateGPR(MachineFunction &MF);
+ void processPredicateGPR(const Register &Reg);
+ unsigned getPredForm(unsigned Opc);
+ bool isConvertibleToPredForm(const MachineInstr *MI);
+ bool isScalarCmp(unsigned Opc);
+ bool isScalarPred(Register PredReg);
+ Register getPredRegFor(const Register &Reg);
+ bool convertToPredForm(MachineInstr *MI);
+ bool eliminatePredCopies(MachineFunction &MF);
+ };
+
+ char HexagonGenPredicate::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred",
+ "Hexagon generate predicate operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
+ "Hexagon generate predicate operations", false, false)
+
+bool HexagonGenPredicate::isPredReg(unsigned R) {
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ return RC == &Hexagon::PredRegsRegClass;
+}
+
+
+unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case A2_and:
+ case A2_andp:
+ return C2_and;
+ case A4_andn:
+ case A4_andnp:
+ return C2_andn;
+ case M4_and_and:
+ return C4_and_and;
+ case M4_and_andn:
+ return C4_and_andn;
+ case M4_and_or:
+ return C4_and_or;
+
+ case A2_or:
+ case A2_orp:
+ return C2_or;
+ case A4_orn:
+ case A4_ornp:
+ return C2_orn;
+ case M4_or_and:
+ return C4_or_and;
+ case M4_or_andn:
+ return C4_or_andn;
+ case M4_or_or:
+ return C4_or_or;
+
+ case A2_xor:
+ case A2_xorp:
+ return C2_xor;
+
+ case C2_tfrrp:
+ return COPY;
+ }
+ // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here
+ // to denote "none", but we need to make sure that none of the valid opcodes
+ // that we return will ever be 0.
+ assert(PHI == 0 && "Use different value for <none>");
+ return 0;
+}
+
+
+bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (getPredForm(Opc) != 0)
+ return true;
+
+ // Comparisons against 0 are also convertible. This does not apply to
+ // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which
+ // may not match the value that the predicate register would have if
+ // it was converted to a predicate form.
+ switch (Opc) {
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C4_cmpneqi:
+ if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+ return true;
+ break;
+ }
+ return false;
+}
+
+
+void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
+ for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+ MachineBasicBlock &B = *A;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::C2_tfrpr:
+ case TargetOpcode::COPY:
+ if (isPredReg(MI->getOperand(1).getReg())) {
+ Register RD = MI->getOperand(0);
+ if (TargetRegisterInfo::isVirtualRegister(RD.R))
+ PredGPRs.insert(RD);
+ }
+ break;
+ }
+ }
+ }
+}
+
+
+void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": "
+ << PrintReg(Reg.R, TRI, Reg.S) << "\n");
+ typedef MachineRegisterInfo::use_iterator use_iterator;
+ use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
+ if (I == E) {
+ DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n');
+ MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+ DefI->eraseFromParent();
+ return;
+ }
+
+ for (; I != E; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (isConvertibleToPredForm(UseI))
+ PUsers.insert(UseI);
+ }
+}
+
+
+Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+ // Create a predicate register for a given Reg. The newly created register
+ // will have its value copied from Reg, so that it can be later used as
+ // an operand in other instructions.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg.R));
+ RegToRegMap::iterator F = G2P.find(Reg);
+ if (F != G2P.end())
+ return F->second;
+
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI));
+ MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+ assert(DefI);
+ unsigned Opc = DefI->getOpcode();
+ if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
+ assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
+ Register PR = DefI->getOperand(1);
+ G2P.insert(std::make_pair(Reg, PR));
+ DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
+ return PR;
+ }
+
+ MachineBasicBlock &B = *DefI->getParent();
+ DebugLoc DL = DefI->getDebugLoc();
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ unsigned NewPR = MRI->createVirtualRegister(PredRC);
+
+ // For convertible instructions, do not modify them, so that they can
+ // be converted later. Generate a copy from Reg to NewPR.
+ if (isConvertibleToPredForm(DefI)) {
+ MachineBasicBlock::iterator DefIt = DefI;
+ BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
+ .addReg(Reg.R, 0, Reg.S);
+ G2P.insert(std::make_pair(Reg, Register(NewPR)));
+ DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n');
+ return Register(NewPR);
+ }
+
+ llvm_unreachable("Invalid argument");
+}
+
+
+bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgei:
+ case Hexagon::C2_cmpgeui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonGenPredicate::isScalarPred(Register PredReg) {
+ std::queue<Register> WorkQ;
+ WorkQ.push(PredReg);
+
+ while (!WorkQ.empty()) {
+ Register PR = WorkQ.front();
+ WorkQ.pop();
+ const MachineInstr *DefI = MRI->getVRegDef(PR.R);
+ if (!DefI)
+ return false;
+ unsigned DefOpc = DefI->getOpcode();
+ switch (DefOpc) {
+ case TargetOpcode::COPY: {
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ if (MRI->getRegClass(PR.R) != PredRC)
+ return false;
+ // If it is a copy between two predicate registers, fall through.
+ }
+ case Hexagon::C2_and:
+ case Hexagon::C2_andn:
+ case Hexagon::C4_and_and:
+ case Hexagon::C4_and_andn:
+ case Hexagon::C4_and_or:
+ case Hexagon::C2_or:
+ case Hexagon::C2_orn:
+ case Hexagon::C4_or_and:
+ case Hexagon::C4_or_andn:
+ case Hexagon::C4_or_or:
+ case Hexagon::C4_or_orn:
+ case Hexagon::C2_xor:
+ // Add operands to the queue.
+ for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo)
+ if (Mo->isReg() && Mo->isUse())
+ WorkQ.push(Register(Mo->getReg()));
+ break;
+
+ // All non-vector compares are ok, everything else is bad.
+ default:
+ return isScalarCmp(DefOpc);
+ }
+ }
+
+ return true;
+}
+
+
+bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI);
+
+ unsigned Opc = MI->getOpcode();
+ assert(isConvertibleToPredForm(MI));
+ unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = 0; i < NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg(MO);
+ if (Reg.S && Reg.S != Hexagon::subreg_loreg)
+ return false;
+ if (!PredGPRs.count(Reg))
+ return false;
+ }
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned NewOpc = getPredForm(Opc);
+ // Special case for comparisons against 0.
+ if (NewOpc == 0) {
+ switch (Opc) {
+ case Hexagon::C2_cmpeqi:
+ NewOpc = Hexagon::C2_not;
+ break;
+ case Hexagon::C4_cmpneqi:
+ NewOpc = TargetOpcode::COPY;
+ break;
+ default:
+ return false;
+ }
+
+ // If it's a scalar predicate register, then all bits in it are
+ // the same. Otherwise, to determine whether all bits are 0 or not
+ // we would need to use any8.
+ Register PR = getPredRegFor(MI->getOperand(1));
+ if (!isScalarPred(PR))
+ return false;
+ // This will skip the immediate argument when creating the predicate
+ // version instruction.
+ NumOps = 2;
+ }
+
+ // Some sanity: check that def is in operand #0.
+ MachineOperand &Op0 = MI->getOperand(0);
+ assert(Op0.isDef());
+ Register OutR(Op0);
+
+ // Don't use getPredRegFor, since it will create an association between
+ // the argument and a created predicate register (i.e. it will insert a
+ // copy if a new predicate register is created).
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ Register NewPR = MRI->createVirtualRegister(PredRC);
+ MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
+
+ // Add predicate counterparts of the GPRs.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ Register GPR = MI->getOperand(i);
+ Register Pred = getPredRegFor(GPR);
+ MIB.addReg(Pred.R, 0, Pred.S);
+ }
+ DEBUG(dbgs() << "generated: " << *MIB);
+
+ // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
+ // with NewGPR.
+ const TargetRegisterClass *RC = MRI->getRegClass(OutR.R);
+ unsigned NewOutR = MRI->createVirtualRegister(RC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR)
+ .addReg(NewPR.R, 0, NewPR.S);
+ MRI->replaceRegWith(OutR.R, NewOutR);
+ MI->eraseFromParent();
+
+ // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn),
+ // then the output will be a predicate register. Do not visit the
+ // users of it.
+ if (!isPredReg(NewOutR)) {
+ Register R(NewOutR);
+ PredGPRs.insert(R);
+ processPredicateGPR(R);
+ }
+ return true;
+}
+
+
+bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
+ DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n");
+ const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+ bool Changed = false;
+ VectOfInst Erase;
+
+ // First, replace copies
+ // IntR = PredR1
+ // PredR2 = IntR
+ // with
+ // PredR2 = PredR1
+ // Such sequences can be generated when a copy-into-pred is generated from
+ // a gpr register holding a result of a convertible instruction. After
+ // the convertible instruction is converted, its predicate result will be
+ // copied back into the original gpr.
+
+ for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+ MachineBasicBlock &B = *A;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ if (I->getOpcode() != TargetOpcode::COPY)
+ continue;
+ Register DR = I->getOperand(0);
+ Register SR = I->getOperand(1);
+ if (!TargetRegisterInfo::isVirtualRegister(DR.R))
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(SR.R))
+ continue;
+ if (MRI->getRegClass(DR.R) != PredRC)
+ continue;
+ if (MRI->getRegClass(SR.R) != PredRC)
+ continue;
+ assert(!DR.S && !SR.S && "Unexpected subregister");
+ MRI->replaceRegWith(DR.R, SR.R);
+ Erase.insert(I);
+ Changed = true;
+ }
+ }
+
+ for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return Changed;
+}
+
+
+bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ PredGPRs.clear();
+ PUsers.clear();
+ G2P.clear();
+
+ bool Changed = false;
+ collectPredicateGPR(MF);
+ for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I)
+ processPredicateGPR(*I);
+
+ bool Again;
+ do {
+ Again = false;
+ VectOfInst Processed, Copy;
+
+ typedef VectOfInst::iterator iterator;
+ Copy = PUsers;
+ for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ bool Done = convertToPredForm(MI);
+ if (Done) {
+ Processed.insert(MI);
+ Again = true;
+ }
+ }
+ Changed |= Again;
+
+ auto Done = [Processed] (MachineInstr *MI) -> bool {
+ return Processed.count(MI);
+ };
+ PUsers.remove_if(Done);
+ } while (Again);
+
+ Changed |= eliminatePredCopies(MF);
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenPredicate() {
+ return new HexagonGenPredicate();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 0000000..d20a809
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,1965 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction. The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hwloops"
+
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
+
+// Option to create preheader only for a specific function.
+static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
+ cl::init(""));
+#endif
+
+// Option to create a preheader if one doesn't exist.
+static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
+ cl::Hidden, cl::init(true),
+ cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace llvm {
+ FunctionPass *createHexagonHardwareLoops();
+ void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
+namespace {
+ class CountValue;
+ struct HexagonHardwareLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ const HexagonInstrInfo *TII;
+#ifndef NDEBUG
+ static int Counter;
+#endif
+
+ public:
+ static char ID;
+
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {
+ initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override { return "Hexagon Hardware Loops"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
+
+ /// Kinds of comparisons in the compare instructions.
+ struct Comparison {
+ enum Kind {
+ EQ = 0x01,
+ NE = 0x02,
+ L = 0x04,
+ G = 0x08,
+ U = 0x40,
+ LTs = L,
+ LEs = L | EQ,
+ GTs = G,
+ GEs = G | EQ,
+ LTu = L | U,
+ LEu = L | EQ | U,
+ GTu = G | U,
+ GEu = G | EQ | U
+ };
+
+ static Kind getSwappedComparison(Kind Cmp) {
+ assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)(Cmp ^ (L|G));
+ return Cmp;
+ }
+
+ static Kind getNegatedComparison(Kind Cmp) {
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)((Cmp ^ (L | G)) ^ EQ);
+ if ((Cmp & NE) || (Cmp & EQ))
+ return (Kind)(Cmp ^ (EQ | NE));
+ return (Kind)0;
+ }
+
+ static bool isSigned(Kind Cmp) {
+ return (Cmp & (L | G) && !(Cmp & U));
+ }
+
+ static bool isUnsigned(Kind Cmp) {
+ return (Cmp & U);
+ }
+
+ };
+
+ /// \brief Find the register that contains the loop controlling
+ /// induction variable.
+ /// If successful, it will return true and set the \p Reg, \p IVBump
+ /// and \p IVOp arguments. Otherwise it will return false.
+ /// The returned induction register is the register R that follows the
+ /// following induction pattern:
+ /// loop:
+ /// R = phi ..., [ R.next, LatchBlock ]
+ /// R.next = R + #bump
+ /// if (R.next < #N) goto loop
+ /// IVBump is the immediate value added to R, and IVOp is the instruction
+ /// "R.next = R + #bump".
+ bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+ int64_t &IVBump, MachineInstr *&IVOp) const;
+
+ /// \brief Return the comparison kind for the specified opcode.
+ Comparison::Kind getComparisonKind(unsigned CondOpc,
+ MachineOperand *InitialValue,
+ const MachineOperand *Endvalue,
+ int64_t IVBump) const;
+
+ /// \brief Analyze the statements in a loop to determine if the loop
+ /// has a computable trip count and, if so, return a value that represents
+ /// the trip count expression.
+ CountValue *getLoopTripCount(MachineLoop *L,
+ SmallVectorImpl<MachineInstr *> &OldInsts);
+
+ /// \brief Return the expression that represents the number of times
+ /// a loop iterates. The function takes the operands that represent the
+ /// loop start value, loop end value, and induction value. Based upon
+ /// these operands, the function attempts to compute the trip count.
+ /// If the trip count is not directly available (as an immediate value,
+ /// or a register), the function will attempt to insert computation of it
+ /// to the loop's preheader.
+ CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
+ const MachineOperand *End, unsigned IVReg,
+ int64_t IVBump, Comparison::Kind Cmp) const;
+
+ /// \brief Return true if the instruction is not valid within a hardware
+ /// loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const;
+
+ /// \brief Return true if the loop contains an instruction that inhibits
+ /// using the hardware loop.
+ bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
+
+ /// \brief Given a loop, check if we can convert it to a hardware loop.
+ /// If so, then perform the conversion and return true.
+ bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
+
+ /// \brief Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVectorImpl<MachineInstr *> &DeadPhis) const;
+
+ /// \brief Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+
+ /// \brief Make sure that the "bump" instruction executes before the
+ /// compare. We need that for the IV fixup, so that the compare
+ /// instruction would not use a bumped value that has not yet been
+ /// defined. If the instructions are out of order, try to reorder them.
+ bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+ /// \brief Return true if MO and MI pair is visited only once. If visited
+ /// more than once, this indicates there is recursion. In such a case,
+ /// return false.
+ bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
+ const MachineOperand *MO,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Return true if the Phi may generate a value that may underflow,
+ /// or may wrap.
+ bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Return true if the induction variable may underflow an unsigned
+ /// value in the first iteration.
+ bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
+ const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const;
+
+ /// \brief Check if the given operand has a compile-time known constant
+ /// value. Return true if yes, and false otherwise. When returning true, set
+ /// Val to the corresponding constant value.
+ bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
+
+ /// \brief Check if the operand has a compile-time known constant value.
+ bool isImmediate(const MachineOperand &MO) const {
+ int64_t V;
+ return checkForImmediate(MO, V);
+ }
+
+ /// \brief Return the immediate for the specified operand.
+ int64_t getImmediate(const MachineOperand &MO) const {
+ int64_t V;
+ if (!checkForImmediate(MO, V))
+ llvm_unreachable("Invalid operand");
+ return V;
+ }
+
+ /// \brief Reset the given machine operand to now refer to a new immediate
+ /// value. Assumes that the operand was already referencing an immediate
+ /// value, either directly, or via a register.
+ void setImmediate(MachineOperand &MO, int64_t Val);
+
+ /// \brief Fix the data flow of the induction varible.
+ /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+ /// |
+ /// +-> back to phi
+ /// where "bump" is the increment of the induction variable:
+ /// iv = iv + #const.
+ /// Due to some prior code transformations, the actual flow may look
+ /// like this:
+ /// phi -+-> bump ---> back to phi
+ /// |
+ /// +-> comparison-in-latch (against upper_bound-bump),
+ /// i.e. the comparison that controls the loop execution may be using
+ /// the value of the induction variable from before the increment.
+ ///
+ /// Return true if the loop's flow is the desired one (i.e. it's
+ /// either been fixed, or no fixing was necessary).
+ /// Otherwise, return false. This can happen if the induction variable
+ /// couldn't be identified, or if the value in the latch's comparison
+ /// cannot be adjusted to reflect the post-bump value.
+ bool fixupInductionVariable(MachineLoop *L);
+
+ /// \brief Given a loop, if it does not have a preheader, create one.
+ /// Return the block that is the preheader.
+ MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
+ };
+
+ char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+ int HexagonHardwareLoops::Counter = 0;
+#endif
+
+ /// \brief Abstraction for a trip count of a loop. A smaller version
+ /// of the MachineOperand class without the concerns of changing the
+ /// operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ struct {
+ unsigned Reg;
+ unsigned Sub;
+ } R;
+ unsigned ImmVal;
+ } Contents;
+
+ public:
+ explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+ Kind = t;
+ if (Kind == CV_Register) {
+ Contents.R.Reg = v;
+ Contents.R.Sub = u;
+ } else {
+ Contents.ImmVal = v;
+ }
+ }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Reg;
+ }
+ unsigned getSubReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Sub;
+ }
+ unsigned getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ return Contents.ImmVal;
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
+ if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+ if (isImm()) { OS << Contents.ImmVal; }
+ }
+ };
+} // end anonymous namespace
+
+
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+
+FunctionPass *llvm::createHexagonHardwareLoops() {
+ return new HexagonHardwareLoops();
+}
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+ bool Changed = false;
+
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+ for (auto &L : *MLI)
+ if (!L->getParentLoop()) {
+ bool L0Used = false;
+ bool L1Used = false;
+ Changed |= convertToHardwareLoop(L, L0Used, L1Used);
+ }
+
+ return Changed;
+}
+
+/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
+/// return the exiting block. Return 'null' when multiple exiting blocks are
+/// present.
+static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
+ if (MachineBasicBlock *Latch = L->getLoopLatch()) {
+ if (L->isLoopExiting(Latch))
+ return Latch;
+ else
+ return L->getExitingBlock();
+ }
+ return nullptr;
+}
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+ unsigned &Reg,
+ int64_t &IVBump,
+ MachineInstr *&IVOp
+ ) const {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ if (!Header || !Preheader || !Latch || !ExitingBlock)
+ return false;
+
+ // This pair represents an induction register together with an immediate
+ // value that will be added to it in each loop iteration.
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+
+ // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
+ // from an induction operation
+ // R.next = R + bump
+ // where bump is an immediate value.
+ typedef std::map<unsigned,RegisterBump> InductionMap;
+
+ InductionMap IndMap;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction. Get the operand that corresponds to the
+ // latch block, and see if is a result of an addition of form "reg+imm",
+ // where the "reg" is defined by the PHI node we are looking at.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiOpReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
+
+ if (isAdd) {
+ // If the register operand to the add is the PHI we're looking at, this
+ // meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ MachineOperand &Opnd2 = DI->getOperand(2);
+ int64_t V;
+ if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ unsigned PredR, PredPos, PredRegFlags;
+ if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
+ return false;
+
+ MachineInstr *PredI = MRI->getVRegDef(PredR);
+ if (!PredI->isCompare())
+ return false;
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpImm = 0, CmpMask = 0;
+ bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+ CmpMask, CmpImm);
+ // Fail if the compare was not analyzed, or it's not comparing a register
+ // with an immediate value. Not checking the mask here, since we handle
+ // the individual compare opcodes (including A4_cmpb*) later on.
+ if (!CmpAnalyzed)
+ return false;
+
+ // Exactly one of the input registers to the comparison should be among
+ // the induction registers.
+ InductionMap::iterator IndMapEnd = IndMap.end();
+ InductionMap::iterator F = IndMapEnd;
+ if (CmpReg1 != 0) {
+ InductionMap::iterator F1 = IndMap.find(CmpReg1);
+ if (F1 != IndMapEnd)
+ F = F1;
+ }
+ if (CmpReg2 != 0) {
+ InductionMap::iterator F2 = IndMap.find(CmpReg2);
+ if (F2 != IndMapEnd) {
+ if (F != IndMapEnd)
+ return false;
+ F = F2;
+ }
+ }
+ if (F == IndMapEnd)
+ return false;
+
+ Reg = F->second.first;
+ IVBump = F->second.second;
+ IVOp = MRI->getVRegDef(F->first);
+ return true;
+}
+
+// Return the comparison kind for the specified opcode.
+HexagonHardwareLoops::Comparison::Kind
+HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
+ MachineOperand *InitialValue,
+ const MachineOperand *EndValue,
+ int64_t IVBump) const {
+ Comparison::Kind Cmp = (Comparison::Kind)0;
+ switch (CondOpc) {
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ Cmp = Comparison::EQ;
+ break;
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmpneqi:
+ Cmp = Comparison::NE;
+ break;
+ case Hexagon::C4_cmplte:
+ Cmp = Comparison::LEs;
+ break;
+ case Hexagon::C4_cmplteu:
+ Cmp = Comparison::LEu;
+ break;
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ Cmp = Comparison::GTu;
+ break;
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ Cmp = Comparison::GTs;
+ break;
+ default:
+ return (Comparison::Kind)0;
+ }
+ return Cmp;
+}
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
+///
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+ SmallVectorImpl<MachineInstr *> &OldInsts) {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) // dead loop?
+ return nullptr;
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) // multiple backedges?
+ return nullptr;
+
+ // Make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return nullptr;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return nullptr;
+
+ // Look for the cmp instruction to determine if we can get a useful trip
+ // count. The trip count can be either a register or an immediate. The
+ // location of the value depends upon the type (reg or imm).
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ if (!ExitingBlock)
+ return nullptr;
+
+ unsigned IVReg = 0;
+ int64_t IVBump = 0;
+ MachineInstr *IVOp;
+ bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+ if (!FoundIV)
+ return nullptr;
+
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+ MachineOperand *InitialValue = nullptr;
+ MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+ MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+ if (MBB == Preheader)
+ InitialValue = &IV_Phi->getOperand(i);
+ else if (MBB == Latch)
+ IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
+ }
+ if (!InitialValue)
+ return nullptr;
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return nullptr;
+
+ MachineBasicBlock *Header = L->getHeader();
+ // TB must be non-null. If FB is also non-null, one of them must be
+ // the header. Otherwise, branch to TB could be exiting the loop, and
+ // the fall through can go to the header.
+ assert (TB && "Exit block without a branch?");
+ if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+ MachineBasicBlock *LTB = 0, *LFB = 0;
+ SmallVector<MachineOperand,2> LCond;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+ if (NotAnalyzed)
+ return nullptr;
+ if (TB == Latch)
+ TB = (LTB == Header) ? LTB : LFB;
+ else
+ FB = (LTB == Header) ? LTB: LFB;
+ }
+ assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+ if (!TB || (FB && TB != Header && FB != Header))
+ return nullptr;
+
+ // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+ // to put imm(0), followed by P in the vector Cond.
+ // If TB is not the header, it means that the "not-taken" path must lead
+ // to the header.
+ bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
+ unsigned PredReg, PredPos, PredRegFlags;
+ if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
+ return nullptr;
+ MachineInstr *CondI = MRI->getVRegDef(PredReg);
+ unsigned CondOpc = CondI->getOpcode();
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int Mask = 0, ImmValue = 0;
+ bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+ Mask, ImmValue);
+ if (!AnalyzedCmp)
+ return nullptr;
+
+ // The comparison operator type determines how we compute the loop
+ // trip count.
+ OldInsts.push_back(CondI);
+ OldInsts.push_back(IVOp);
+
+ // Sadly, the following code gets information based on the position
+ // of the operands in the compare instruction. This has to be done
+ // this way, because the comparisons check for a specific relationship
+ // between the operands (e.g. is-less-than), rather than to find out
+ // what relationship the operands are in (as on PPC).
+ Comparison::Kind Cmp;
+ bool isSwapped = false;
+ const MachineOperand &Op1 = CondI->getOperand(1);
+ const MachineOperand &Op2 = CondI->getOperand(2);
+ const MachineOperand *EndValue = nullptr;
+
+ if (Op1.isReg()) {
+ if (Op2.isImm() || Op1.getReg() == IVReg)
+ EndValue = &Op2;
+ else {
+ EndValue = &Op1;
+ isSwapped = true;
+ }
+ }
+
+ if (!EndValue)
+ return nullptr;
+
+ Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
+ if (!Cmp)
+ return nullptr;
+ if (Negated)
+ Cmp = Comparison::getNegatedComparison(Cmp);
+ if (isSwapped)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ if (InitialValue->isReg()) {
+ unsigned R = InitialValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return nullptr;
+ OldInsts.push_back(MRI->getVRegDef(R));
+ }
+ if (EndValue->isReg()) {
+ unsigned R = EndValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return nullptr;
+ OldInsts.push_back(MRI->getVRegDef(R));
+ }
+
+ return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
+}
+
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates. The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const {
+ // Cannot handle comparison EQ, i.e. while (A == B).
+ if (Cmp == Comparison::EQ)
+ return nullptr;
+
+ // Check if either the start or end values are an assignment of an immediate.
+ // If so, use the immediate value rather than the register.
+ if (Start->isReg()) {
+ const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+ if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+ StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
+ Start = &StartValInstr->getOperand(1);
+ }
+ if (End->isReg()) {
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+ EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
+ End = &EndValInstr->getOperand(1);
+ }
+
+ if (!Start->isReg() && !Start->isImm())
+ return nullptr;
+ if (!End->isReg() && !End->isImm())
+ return nullptr;
+
+ bool CmpLess = Cmp & Comparison::L;
+ bool CmpGreater = Cmp & Comparison::G;
+ bool CmpHasEqual = Cmp & Comparison::EQ;
+
+ // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
+ if (CmpLess && IVBump < 0)
+ // Loop going while iv is "less" with the iv value going down. Must wrap.
+ return nullptr;
+
+ if (CmpGreater && IVBump > 0)
+ // Loop going while iv is "greater" with the iv value going up. Must wrap.
+ return nullptr;
+
+ // Phis that may feed into the loop.
+ LoopFeederMap LoopFeederPhi;
+
+ // Check if the initial value may be zero and can be decremented in the first
+ // iteration. If the value is zero, the endloop instruction will not decrement
+ // the loop counter, so we shouldn't generate a hardware loop in this case.
+ if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
+ LoopFeederPhi))
+ return nullptr;
+
+ if (Start->isImm() && End->isImm()) {
+ // Both, start and end are immediates.
+ int64_t StartV = Start->getImm();
+ int64_t EndV = End->getImm();
+ int64_t Dist = EndV - StartV;
+ if (Dist == 0)
+ return nullptr;
+
+ bool Exact = (Dist % IVBump) == 0;
+
+ if (Cmp == Comparison::NE) {
+ if (!Exact)
+ return nullptr;
+ if ((Dist < 0) ^ (IVBump < 0))
+ return nullptr;
+ }
+
+ // For comparisons that include the final value (i.e. include equality
+ // with the final value), we need to increase the distance by 1.
+ if (CmpHasEqual)
+ Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+ // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
+ // CmpGreater should imply Dist < 0. These conditions could actually
+ // fail, for example, in unreachable code (which may still appear to be
+ // reachable in the CFG).
+ if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
+ return nullptr;
+
+ // "Normalized" distance, i.e. with the bump set to +-1.
+ int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
+ : (-Dist + (-IVBump - 1)) / (-IVBump);
+ assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
+
+ uint64_t Count = Dist1;
+
+ if (Count > 0xFFFFFFFFULL)
+ return nullptr;
+
+ return new CountValue(CountValue::CV_Immediate, Count);
+ }
+
+ // A general case: Start and End are some values, but the actual
+ // iteration count may not be available. If it is not, insert
+ // a computation of it into the preheader.
+
+ // If the induction variable bump is not a power of 2, quit.
+ // Othwerise we'd need a general integer division.
+ if (!isPowerOf2_64(std::abs(IVBump)))
+ return nullptr;
+
+ MachineBasicBlock *PH = Loop->getLoopPreheader();
+ assert (PH && "Should have a preheader by now");
+ MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+ DebugLoc DL;
+ if (InsertPos != PH->end())
+ DL = InsertPos->getDebugLoc();
+
+ // If Start is an immediate and End is a register, the trip count
+ // will be "reg - imm". Hexagon's "subtract immediate" instruction
+ // is actually "reg + -imm".
+
+ // If the loop IV is going downwards, i.e. if the bump is negative,
+ // then the iteration count (computed as End-Start) will need to be
+ // negated. To avoid the negation, just swap Start and End.
+ if (IVBump < 0) {
+ std::swap(Start, End);
+ IVBump = -IVBump;
+ }
+ // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
+ // Signedness, and "including equality" are preserved.
+
+ bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+ bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+ int64_t StartV = 0, EndV = 0;
+ if (Start->isImm())
+ StartV = Start->getImm();
+ if (End->isImm())
+ EndV = End->getImm();
+
+ int64_t AdjV = 0;
+ // To compute the iteration count, we would need this computation:
+ // Count = (End - Start + (IVBump-1)) / IVBump
+ // or, when CmpHasEqual:
+ // Count = (End - Start + (IVBump-1)+1) / IVBump
+ // The "IVBump-1" part is the adjustment (AdjV). We can avoid
+ // generating an instruction specifically to add it if we can adjust
+ // the immediate values for Start or End.
+
+ if (CmpHasEqual) {
+ // Need to add 1 to the total iteration count.
+ if (Start->isImm())
+ StartV--;
+ else if (End->isImm())
+ EndV++;
+ else
+ AdjV += 1;
+ }
+
+ if (Cmp != Comparison::NE) {
+ if (Start->isImm())
+ StartV -= (IVBump-1);
+ else if (End->isImm())
+ EndV += (IVBump-1);
+ else
+ AdjV += (IVBump-1);
+ }
+
+ unsigned R = 0, SR = 0;
+ if (Start->isReg()) {
+ R = Start->getReg();
+ SR = Start->getSubReg();
+ } else {
+ R = End->getReg();
+ SR = End->getSubReg();
+ }
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ // Hardware loops cannot handle 64-bit registers. If it's a double
+ // register, it has to have a subregister.
+ if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+ return nullptr;
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+ // Compute DistR (register with the distance between Start and End).
+ unsigned DistR, DistSR;
+
+ // Avoid special case, where the start value is an imm(0).
+ if (Start->isImm() && StartV == 0) {
+ DistR = End->getReg();
+ DistSR = End->getSubReg();
+ } else {
+ const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
+ (RegToImm ? TII->get(Hexagon::A2_subri) :
+ TII->get(Hexagon::A2_addi));
+ if (RegToReg || RegToImm) {
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+ if (RegToReg)
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ else
+ SubIB.addImm(EndV)
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ DistR = SubR;
+ } else {
+ // If the loop has been unrolled, we should use the original loop count
+ // instead of recalculating the value. This will avoid additional
+ // 'Add' instruction.
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+ EndValInstr->getOperand(2).getImm() == StartV) {
+ DistR = EndValInstr->getOperand(1).getReg();
+ } else {
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addImm(-StartV);
+ DistR = SubR;
+ }
+ }
+ DistSR = 0;
+ }
+
+ // From DistR, compute AdjR (register with the adjusted distance).
+ unsigned AdjR, AdjSR;
+
+ if (AdjV == 0) {
+ AdjR = DistR;
+ AdjSR = DistSR;
+ } else {
+ // Generate CountR = ADD DistR, AdjVal
+ unsigned AddR = MRI->createVirtualRegister(IntRC);
+ MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
+ BuildMI(*PH, InsertPos, DL, AddD, AddR)
+ .addReg(DistR, 0, DistSR)
+ .addImm(AdjV);
+
+ AdjR = AddR;
+ AdjSR = 0;
+ }
+
+ // From AdjR, compute CountR (register with the final count).
+ unsigned CountR, CountSR;
+
+ if (IVBump == 1) {
+ CountR = AdjR;
+ CountSR = AdjSR;
+ } else {
+ // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+ unsigned Shift = Log2_32(IVBump);
+
+ // Generate NormR = LSR DistR, Shift.
+ unsigned LsrR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
+ BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+ .addReg(AdjR, 0, AdjSR)
+ .addImm(Shift);
+
+ CountR = LsrR;
+ CountSR = 0;
+ }
+
+ return new CountValue(CountValue::CV_Register, CountR, CountSR);
+}
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
+ bool IsInnerHWLoop) const {
+
+ // Call is not allowed because the callee may use a hardware loop except for
+ // the case when the call never returns.
+ if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
+ return true;
+
+ // Check if the instruction defines a hardware loop register.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
+ R == Hexagon::LC1 || R == Hexagon::SA1))
+ return true;
+ if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
+ return true;
+ }
+ return false;
+}
+
+/// \brief Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop instruction.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
+ bool IsInnerHWLoop) const {
+ const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+ DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+ DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// \brief Returns true if the instruction is dead. This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+ SmallVectorImpl<MachineInstr *> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (MRI->use_nodbg_empty(Reg))
+ continue;
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+ use_nodbg_iterator End = MRI->use_nodbg_end();
+ if (std::next(I) != End || !I->getParent()->isPHI())
+ return false;
+
+ MachineInstr *OnePhi = I->getParent();
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (!OPO.isReg() || !OPO.isDef())
+ continue;
+
+ unsigned OPReg = OPO.getReg();
+ use_nodbg_iterator nextJ;
+ for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+ J != End; J = nextJ) {
+ nextJ = std::next(J);
+ MachineOperand &Use = *J;
+ MachineInstr *UseMI = Use.getParent();
+
+ // If the phi node has a user that is not MI, bail.
+ if (MI != UseMI)
+ return false;
+ }
+ }
+ DeadPhis.push_back(OnePhi);
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim.
+
+ SmallVector<MachineInstr*, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I != E; I = nextI) {
+ nextI = std::next(I); // I is invalidated by the setReg
+ MachineOperand &Use = *I;
+ MachineInstr *UseMI = I->getParent();
+ if (UseMI == MI)
+ continue;
+ if (Use.isDebug())
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i)
+ DeadPhis[i]->eraseFromParent();
+ }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop. If so, then perform the transformation.
+///
+/// This function works on innermost loops first. A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
+///
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
+ bool &RecL0used,
+ bool &RecL1used) {
+ // This is just for sanity.
+ assert(L->getHeader() && "Loop without a header?");
+
+ bool Changed = false;
+ bool L0Used = false;
+ bool L1Used = false;
+
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+ L0Used |= RecL0used;
+ L1Used |= RecL1used;
+ }
+
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed && L0Used && L1Used)
+ return Changed;
+
+ unsigned LOOP_i;
+ unsigned LOOP_r;
+ unsigned ENDLOOP;
+
+ // Flag used to track loopN instruction:
+ // 1 - Hardware loop is being generated for the inner most loop.
+ // 0 - Hardware loop is being generated for the outer loop.
+ unsigned IsInnerHWLoop = 1;
+
+ if (L0Used) {
+ LOOP_i = Hexagon::J2_loop1i;
+ LOOP_r = Hexagon::J2_loop1r;
+ ENDLOOP = Hexagon::ENDLOOP1;
+ IsInnerHWLoop = 0;
+ } else {
+ LOOP_i = Hexagon::J2_loop0i;
+ LOOP_r = Hexagon::J2_loop0r;
+ ENDLOOP = Hexagon::ENDLOOP0;
+ }
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = HWLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= HWLoopLimit)
+ return false;
+ Counter++;
+ }
+#endif
+
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L, IsInnerHWLoop))
+ return false;
+
+ MachineBasicBlock *LastMBB = getExitingBlock(L);
+ // Don't generate hw loop if the loop has more than one exit.
+ if (!LastMBB)
+ return false;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI == LastMBB->end())
+ return false;
+
+ // Is the induction variable bump feeding the latch condition?
+ if (!fixupInductionVariable(L))
+ return false;
+
+ // Ensure the loop has a preheader: the loop instruction will be
+ // placed there.
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = createPreheaderForLoop(L);
+ if (!Preheader)
+ return false;
+ }
+
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ SmallVector<MachineInstr*, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getLoopTripCount(L, OldInsts);
+ if (!TripCount)
+ return false;
+
+ // Is the trip count available in the preheader?
+ if (TripCount->isReg()) {
+ // There will be a use of the register inserted into the preheader,
+ // so make sure that the register is actually defined at that point.
+ MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+ MachineBasicBlock *BBDef = TCDef->getParent();
+ if (!MDT->dominates(BBDef, Preheader))
+ return false;
+ }
+
+ // Determine the loop start.
+ MachineBasicBlock *TopBlock = L->getTopBlock();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ MachineBasicBlock *LoopStart = 0;
+ if (ExitingBlock != L->getLoopLatch()) {
+ MachineBasicBlock *TB = 0, *FB = 0;
+ SmallVector<MachineOperand, 2> Cond;
+
+ if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
+ return false;
+
+ if (L->contains(TB))
+ LoopStart = TB;
+ else if (L->contains(FB))
+ LoopStart = FB;
+ else
+ return false;
+ }
+ else
+ LoopStart = TopBlock;
+
+ // Convert the loop to a hardware loop.
+ DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+ DebugLoc DL;
+ if (InsertPos != Preheader->end())
+ DL = InsertPos->getDebugLoc();
+
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+ .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
+ // Add the Loop instruction to the beginning of the loop.
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
+ .addReg(CountReg);
+ } else {
+ assert(TripCount->isImm() && "Expecting immediate value for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop,
+ // if the immediate fits in the instructions. Otherwise, we need to
+ // create a new virtual register.
+ int64_t CountImm = TripCount->getImm();
+ if (!TII->isValidOffset(LOOP_i, CountImm)) {
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
+ .addImm(CountImm);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
+ .addMBB(LoopStart).addReg(CountReg);
+ } else
+ BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
+ .addMBB(LoopStart).addImm(CountImm);
+ }
+
+ // Make sure the loop start always has a reference in the CFG. We need
+ // to create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with an endloop instruction.
+ DebugLoc LastIDL = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ if (LastI->getOpcode() == Hexagon::J2_jumpt ||
+ LastI->getOpcode() == Hexagon::J2_jumpf) {
+ // Delete one and change/add an uncond. branch to out of the loop.
+ MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+ LastI = LastMBB->erase(LastI);
+ if (!L->contains(BranchTarget)) {
+ if (LastI != LastMBB->end())
+ LastI = LastMBB->erase(LastI);
+ SmallVector<MachineOperand, 0> Cond;
+ TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
+ }
+ } else {
+ // Conditional branch to loop start; just delete it.
+ LastMBB->erase(LastI);
+ }
+ delete TripCount;
+
+ // The induction operation and the comparison may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
+ ++NumHWLoops;
+
+ // Set RecL1used and RecL0used only after hardware loop has been
+ // successfully generated. Doing it earlier can cause wrong loop instruction
+ // to be used.
+ if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
+ RecL1used = true;
+ else
+ RecL0used = true;
+
+ return true;
+}
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+ MachineInstr *CmpI) {
+ assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+ MachineBasicBlock *BB = BumpI->getParent();
+ if (CmpI->getParent() != BB)
+ return false;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ // Check if things are in order to begin with.
+ for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
+ if (&*I == CmpI)
+ return true;
+
+ // Out of order.
+ unsigned PredR = CmpI->getOperand(0).getReg();
+ bool FoundBump = false;
+ instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
+ for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+ MachineInstr *In = &*I;
+ for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = In->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ if (MO.getReg() == PredR) // Found an intervening use of PredR.
+ return false;
+ }
+ }
+
+ if (In == BumpI) {
+ BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
+ FoundBump = true;
+ break;
+ }
+ }
+ assert (FoundBump && "Cannot determine instruction order");
+ return FoundBump;
+}
+
+/// This function is required to break recursion. Visiting phis in a loop may
+/// result in recursion during compilation. We break the recursion by making
+/// sure that we visit a MachineOperand and its definition in a
+/// MachineInstruction only once. If we attempt to visit more than once, then
+/// there is recursion, and will return false.
+bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
+ MachineInstr *MI,
+ const MachineOperand *MO,
+ LoopFeederMap &LoopFeederPhi) const {
+ if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
+ const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+ DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+ // Ignore all BBs that form Loop.
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ if (A == MBB)
+ return false;
+ }
+ MachineInstr *Def = MRI->getVRegDef(MO->getReg());
+ LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
+ return true;
+ } else
+ // Already visited node.
+ return false;
+}
+
+/// Return true if a Phi may generate a value that can underflow.
+/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
+bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
+ MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
+ MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
+ assert(Phi->isPHI() && "Expecting a Phi.");
+ // Walk through each Phi, and its used operands. Make sure that
+ // if there is recursion in Phi, we won't generate hardware loops.
+ for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
+ if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
+ if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
+ Phi->getParent(), L, LoopFeederPhi))
+ return true;
+ return false;
+}
+
+/// Return true if the induction variable can underflow in the first iteration.
+/// An example, is an initial unsigned value that is 0 and is decrement in the
+/// first itertion of a do-while loop. In this case, we cannot generate a
+/// hardware loop because the endloop instruction does not decrement the loop
+/// counter if it is <= 1. We only need to perform this analysis if the
+/// initial value is a register.
+///
+/// This function assumes the initial value may underfow unless proven
+/// otherwise. If the type is signed, then we don't care because signed
+/// underflow is undefined. We attempt to prove the initial value is not
+/// zero by perfoming a crude analysis of the loop counter. This function
+/// checks if the initial value is used in any comparison prior to the loop
+/// and, if so, assumes the comparison is a range check. This is inexact,
+/// but will catch the simple cases.
+bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
+ const MachineOperand *InitVal, const MachineOperand *EndVal,
+ MachineBasicBlock *MBB, MachineLoop *L,
+ LoopFeederMap &LoopFeederPhi) const {
+ // Only check register values since they are unknown.
+ if (!InitVal->isReg())
+ return false;
+
+ if (!EndVal->isImm())
+ return false;
+
+ // A register value that is assigned an immediate is a known value, and it
+ // won't underflow in the first iteration.
+ int64_t Imm;
+ if (checkForImmediate(*InitVal, Imm))
+ return (EndVal->getImm() == Imm);
+
+ unsigned Reg = InitVal->getReg();
+
+ // We don't know the value of a physical register.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return true;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (!Def)
+ return true;
+
+ // If the initial value is a Phi or copy and the operands may not underflow,
+ // then the definition cannot be underflow either.
+ if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
+ L, LoopFeederPhi))
+ return false;
+ if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
+ EndVal, Def->getParent(),
+ L, LoopFeederPhi))
+ return false;
+
+ // Iterate over the uses of the initial value. If the initial value is used
+ // in a compare, then we assume this is a range check that ensures the loop
+ // doesn't underflow. This is not an exact test and should be improved.
+ for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
+ E = MRI->use_instr_nodbg_end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpMask = 0, CmpValue = 0;
+
+ if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 2> Cond;
+ if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
+ continue;
+
+ Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
+ if (Cmp == 0)
+ continue;
+ if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
+ Cmp = Comparison::getNegatedComparison(Cmp);
+ if (CmpReg2 != 0 && CmpReg2 == Reg)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ // Signed underflow is undefined.
+ if (Comparison::isSigned(Cmp))
+ return false;
+
+ // Check if there is a comparison of the initial value. If the initial value
+ // is greater than or not equal to another value, then assume this is a
+ // range check.
+ if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
+ return false;
+ }
+
+ // OK - this is a hack that needs to be improved. We really need to analyze
+ // the instructions performed on the initial value. This works on the simplest
+ // cases only.
+ if (!Def->isCopy() && !Def->isPHI())
+ return false;
+
+ return true;
+}
+
+bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
+ int64_t &Val) const {
+ if (MO.isImm()) {
+ Val = MO.getImm();
+ return true;
+ }
+ if (!MO.isReg())
+ return false;
+
+ // MO is a register. Check whether it is defined as an immediate value,
+ // and if so, get the value of it in TV. That value will then need to be
+ // processed to handle potential subregisters in MO.
+ int64_t TV;
+
+ unsigned R = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return false;
+ MachineInstr *DI = MRI->getVRegDef(R);
+ unsigned DOpc = DI->getOpcode();
+ switch (DOpc) {
+ case TargetOpcode::COPY:
+ case Hexagon::A2_tfrsi:
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real: {
+ // Call recursively to avoid an extra check whether operand(1) is
+ // indeed an immediate (it could be a global address, for example),
+ // plus we can handle COPY at the same time.
+ if (!checkForImmediate(DI->getOperand(1), TV))
+ return false;
+ break;
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew: {
+ const MachineOperand &S1 = DI->getOperand(1);
+ const MachineOperand &S2 = DI->getOperand(2);
+ int64_t V1, V2;
+ if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
+ return false;
+ TV = V2 | (V1 << 32);
+ break;
+ }
+ case TargetOpcode::REG_SEQUENCE: {
+ const MachineOperand &S1 = DI->getOperand(1);
+ const MachineOperand &S3 = DI->getOperand(3);
+ int64_t V1, V3;
+ if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
+ return false;
+ unsigned Sub2 = DI->getOperand(2).getImm();
+ unsigned Sub4 = DI->getOperand(4).getImm();
+ if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
+ TV = V1 | (V3 << 32);
+ else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
+ TV = V3 | (V1 << 32);
+ else
+ llvm_unreachable("Unexpected form of REG_SEQUENCE");
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ // By now, we should have successfuly obtained the immediate value defining
+ // the register referenced in MO. Handle a potential use of a subregister.
+ switch (MO.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Val = TV & 0xFFFFFFFFULL;
+ break;
+ case Hexagon::subreg_hireg:
+ Val = (TV >> 32) & 0xFFFFFFFFULL;
+ break;
+ default:
+ Val = TV;
+ break;
+ }
+ return true;
+}
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+ if (MO.isImm()) {
+ MO.setImm(Val);
+ return;
+ }
+
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = MRI->getVRegDef(R);
+
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ MachineBasicBlock &B = *DI->getParent();
+ DebugLoc DL = DI->getDebugLoc();
+ BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
+ MO.setReg(NewR);
+}
+
+static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
+ // These two instructions are not extendable.
+ if (CmpOpc == Hexagon::A4_cmpbeqi)
+ return isUInt<8>(Imm);
+ if (CmpOpc == Hexagon::A4_cmpbgti)
+ return isInt<8>(Imm);
+ // The rest of the comparison-with-immediate instructions are extendable.
+ return true;
+}
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+
+ if (!(Header && Latch && ExitingBlock))
+ return false;
+
+ // These data structures follow the same concept as the corresponding
+ // ones in findInductionRegister (where some comments are).
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+ typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+ typedef std::set<RegisterInduction> RegisterInductionSet;
+
+ // Register candidates for induction variables, with their associated bumps.
+ RegisterInductionSet IndRegs;
+
+ // Look for induction patterns:
+ // vreg1 = PHI ..., [ latch, vreg2 ]
+ // vreg2 = ADD vreg1, imm
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
+
+ if (isAdd) {
+ // If the register operand to the add/sub is the PHI we are looking
+ // at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ MachineOperand &Opnd2 = DI->getOperand(2);
+ int64_t V;
+ if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ if (IndRegs.empty())
+ return false;
+
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
+ SmallVector<MachineOperand,2> Cond;
+ // AnalyzeBranch returns true if it fails to analyze branch.
+ bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+ if (NotAnalyzed || Cond.empty())
+ return false;
+
+ if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+ MachineBasicBlock *LTB = 0, *LFB = 0;
+ SmallVector<MachineOperand,2> LCond;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+ if (NotAnalyzed)
+ return false;
+
+ // Since latch is not the exiting block, the latch branch should be an
+ // unconditional branch to the loop header.
+ if (TB == Latch)
+ TB = (LTB == Header) ? LTB : LFB;
+ else
+ FB = (LTB == Header) ? LTB : LFB;
+ }
+ if (TB != Header) {
+ if (FB != Header) {
+ // The latch/exit block does not go back to the header.
+ return false;
+ }
+ // FB is the header (i.e., uncond. jump to branch header)
+ // In this case, the LoopBody -> TB should not be a back edge otherwise
+ // it could result in an infinite loop after conversion to hw_loop.
+ // This case can happen when the Latch has two jumps like this:
+ // Jmp_c OuterLoopHeader <-- TB
+ // Jmp InnerLoopHeader <-- FB
+ if (MDT->dominates(TB, FB))
+ return false;
+ }
+
+ // Expecting a predicate register as a condition. It won't be a hardware
+ // predicate register at this point yet, just a vreg.
+ // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+ // into Cond, followed by the predicate register. For non-negated branches
+ // it's just the register.
+ unsigned CSz = Cond.size();
+ if (CSz != 1 && CSz != 2)
+ return false;
+
+ if (!Cond[CSz-1].isReg())
+ return false;
+
+ unsigned P = Cond[CSz-1].getReg();
+ MachineInstr *PredDef = MRI->getVRegDef(P);
+
+ if (!PredDef->isCompare())
+ return false;
+
+ SmallSet<unsigned,2> CmpRegs;
+ MachineOperand *CmpImmOp = nullptr;
+
+ // Go over all operands to the compare and look for immediate and register
+ // operands. Assume that if the compare has a single register use and a
+ // single immediate operand, then the register is being compared with the
+ // immediate value.
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg()) {
+ // Skip all implicit references. In one case there was:
+ // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+ if (MO.isImplicit())
+ continue;
+ if (MO.isUse()) {
+ if (!isImmediate(MO)) {
+ CmpRegs.insert(MO.getReg());
+ continue;
+ }
+ // Consider the register to be the "immediate" operand.
+ if (CmpImmOp)
+ return false;
+ CmpImmOp = &MO;
+ }
+ } else if (MO.isImm()) {
+ if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
+ return false;
+ CmpImmOp = &MO;
+ }
+ }
+
+ if (CmpRegs.empty())
+ return false;
+
+ // Check if the compared register follows the order we want. Fix if needed.
+ for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+ I != E; ++I) {
+ // This is a success. If the register used in the comparison is one that
+ // we have identified as a bumped (updated) induction register, there is
+ // nothing to do.
+ if (CmpRegs.count(I->first))
+ return true;
+
+ // Otherwise, if the register being compared comes out of a PHI node,
+ // and has been recognized as following the induction pattern, and is
+ // compared against an immediate, we can fix it.
+ const RegisterBump &RB = I->second;
+ if (CmpRegs.count(RB.first)) {
+ if (!CmpImmOp) {
+ // If both operands to the compare instruction are registers, see if
+ // it can be changed to use induction register as one of the operands.
+ MachineInstr *IndI = nullptr;
+ MachineInstr *nonIndI = nullptr;
+ MachineOperand *IndMO = nullptr;
+ MachineOperand *nonIndMO = nullptr;
+
+ for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+ << *(MRI->getVRegDef(I->first)));
+ if (IndI)
+ return false;
+
+ IndI = MRI->getVRegDef(I->first);
+ IndMO = &MO;
+ } else if (MO.isReg()) {
+ DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+ << *(MRI->getVRegDef(MO.getReg())));
+ if (nonIndI)
+ return false;
+
+ nonIndI = MRI->getVRegDef(MO.getReg());
+ nonIndMO = &MO;
+ }
+ }
+ if (IndI && nonIndI &&
+ nonIndI->getOpcode() == Hexagon::A2_addi &&
+ nonIndI->getOperand(2).isImm() &&
+ nonIndI->getOperand(2).getImm() == - RB.second) {
+ bool Order = orderBumpCompare(IndI, PredDef);
+ if (Order) {
+ IndMO->setReg(I->first);
+ nonIndMO->setReg(nonIndI->getOperand(1).getReg());
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // It is not valid to do this transformation on an unsigned comparison
+ // because it may underflow.
+ Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
+ if (!Cmp || Comparison::isUnsigned(Cmp))
+ return false;
+
+ // If the register is being compared against an immediate, try changing
+ // the compare instruction to use induction register and adjust the
+ // immediate operand.
+ int64_t CmpImm = getImmediate(*CmpImmOp);
+ int64_t V = RB.second;
+ // Handle Overflow (64-bit).
+ if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
+ ((V < 0) && (CmpImm < INT64_MIN - V)))
+ return false;
+ CmpImm += V;
+ // Most comparisons of register against an immediate value allow
+ // the immediate to be constant-extended. There are some exceptions
+ // though. Make sure the new combination will work.
+ if (CmpImmOp->isImm())
+ if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
+ return false;
+
+ // Make sure that the compare happens after the bump. Otherwise,
+ // after the fixup, the compare would use a yet-undefined register.
+ MachineInstr *BumpI = MRI->getVRegDef(I->first);
+ bool Order = orderBumpCompare(BumpI, PredDef);
+ if (!Order)
+ return false;
+
+ // Finally, fix the compare instruction.
+ setImmediate(*CmpImmOp, CmpImm);
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ MO.setReg(I->first);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+ MachineLoop *L) {
+ if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+ return TmpPH;
+
+ if (!HWCreatePreheader)
+ return nullptr;
+
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+ MachineFunction *MF = Header->getParent();
+ DebugLoc DL;
+
+#ifndef NDEBUG
+ if ((PHFn != "") && (PHFn != MF->getName()))
+ return nullptr;
+#endif
+
+ if (!Latch || !ExitingBlock || Header->hasAddressTaken())
+ return nullptr;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+
+ // Verify that all existing predecessors have analyzable branches
+ // (or no branches at all).
+ typedef std::vector<MachineBasicBlock*> MBBVector;
+ MBBVector Preds(Header->pred_begin(), Header->pred_end());
+ SmallVector<MachineOperand,2> Tmp1;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
+
+ if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
+ return nullptr;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+ if (NotAnalyzed)
+ return nullptr;
+ }
+
+ MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+ MF->insert(Header->getIterator(), NewPH);
+
+ if (Header->pred_size() > 2) {
+ // Ensure that the header has only two predecessors: the preheader and
+ // the loop latch. Any additional predecessors of the header should
+ // join at the newly created preheader. Inspect all PHI nodes from the
+ // header and create appropriate corresponding PHI nodes in the preheader.
+
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+
+ const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+ MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+ NewPH->insert(NewPH->end(), NewPN);
+
+ unsigned PR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(PR);
+ unsigned NewPR = MRI->createVirtualRegister(RC);
+ NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+ // Copy all non-latch operands of a header's PHI node to the newly
+ // created PHI node in the preheader.
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ unsigned PredR = PN->getOperand(i).getReg();
+ unsigned PredRSub = PN->getOperand(i).getSubReg();
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB == Latch)
+ continue;
+
+ MachineOperand MO = MachineOperand::CreateReg(PredR, false);
+ MO.setSubReg(PredRSub);
+ NewPN->addOperand(MO);
+ NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+ }
+
+ // Remove copied operands from the old PHI node and add the value
+ // coming from the preheader's PHI.
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB != Latch) {
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ }
+ PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+ PN->addOperand(MachineOperand::CreateMBB(NewPH));
+ }
+
+ } else {
+ assert(Header->pred_size() == 2);
+
+ // The header has only two predecessors, but the non-latch predecessor
+ // is not a preheader (e.g. it has other successors, etc.)
+ // In such a case we don't need any extra PHI nodes in the new preheader,
+ // all we need is to adjust existing PHIs in the header to now refer to
+ // the new preheader.
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ MachineOperand &MO = PN->getOperand(i+1);
+ if (MO.getMBB() != Latch)
+ MO.setMBB(NewPH);
+ }
+ }
+ }
+
+ // "Reroute" the CFG edges to link in the new preheader.
+ // If any of the predecessors falls through to the header, insert a branch
+ // to the new preheader in that place.
+ SmallVector<MachineOperand,1> Tmp2;
+ SmallVector<MachineOperand,1> EmptyCond;
+
+ TB = FB = nullptr;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ Tmp2.clear();
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+ (void)NotAnalyzed; // suppress compiler warning
+ assert (!NotAnalyzed && "Should be analyzable!");
+ if (TB != Header && (Tmp2.empty() || FB != Header))
+ TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
+ PB->ReplaceUsesOfBlockWith(Header, NewPH);
+ }
+ }
+
+ // It can happen that the latch block will fall through into the header.
+ // Insert an unconditional branch to the header.
+ TB = FB = nullptr;
+ bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+ (void)LatchNotAnalyzed; // suppress compiler warning
+ assert (!LatchNotAnalyzed && "Should be analyzable!");
+ if (!TB && !FB)
+ TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
+
+ // Finally, the branch from the preheader to the header.
+ TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
+ NewPH->addSuccessor(Header);
+
+ MachineLoop *ParentLoop = L->getParentLoop();
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
+
+ // Update the dominator information with the new preheader.
+ if (MDT) {
+ MachineDomTreeNode *HDom = MDT->getNode(Header);
+ MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
+ MDT->changeImmediateDominator(Header, NewPH);
+ }
+
+ return NewPH;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 0000000..a0da945
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1563 @@
+//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-isel"
+
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+ cl::Hidden, cl::init(2),
+ cl::desc("Maximum number of uses of a global address such that we still us a"
+ "constant extended instruction"));
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+ void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+ const HexagonTargetMachine& HTM;
+ const HexagonSubtarget *HST;
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+public:
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+ HRI(nullptr) {
+ initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ HRI = HST->getRegisterInfo();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ virtual void PreprocessISelDAG() override;
+ virtual void EmitFunctionEntryCode() override;
+
+ SDNode *Select(SDNode *N) override;
+
+ // Complex Pattern Selectors.
+ inline bool SelectAddrGA(SDValue &N, SDValue &R);
+ inline bool SelectAddrGP(SDValue &N, SDValue &R);
+ bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
+ bool SelectAddrFI(SDValue &N, SDValue &R);
+
+ const char *getPassName() const override {
+ return "Hexagon DAG->DAG Pattern Instruction Selection";
+ }
+
+ SDNode *SelectFrameIndex(SDNode *N);
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
+ SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
+ SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+ SDLoc dl);
+ SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+ SDLoc dl);
+ SDNode *SelectBaseOffsetStore(StoreSDNode *ST, SDLoc dl);
+ SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectSHL(SDNode *N);
+ SDNode *SelectMul(SDNode *N);
+ SDNode *SelectZeroExtend(SDNode *N);
+ SDNode *SelectIntrinsicWChain(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectConstantFP(SDNode *N);
+ SDNode *SelectAdd(SDNode *N);
+ SDNode *SelectBitOp(SDNode *N);
+
+ // XformMskToBitPosU5Imm - Returns the bit position which
+ // the single bit 32 bit mask represents.
+ // Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, DL, MVT::i32);
+ }
+
+ // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+ // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) {
+ return XformMskToBitPosU5Imm(Imm, DL);
+ }
+
+ // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+ // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) {
+ return XformMskToBitPosU5Imm(Imm, DL);
+ }
+
+ // Return true if there is exactly one bit set in V, i.e., if V is one of the
+ // following integers: 2^0, 2^1, ..., 2^31.
+ bool ImmIsSingleBit(uint32_t v) const {
+ return isPowerOf2_32(v);
+ }
+
+ // XformM5ToU5Imm - Return a target constant with the specified value, of
+ // type i32 where the negative literal is transformed into a positive literal
+ // for use in -= memops.
+ inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
+ assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
+ }
+
+ // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+ // [1..128], used in cmpb.gtu instructions.
+ inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8);
+ }
+
+ // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+ }
+
+ // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) {
+ assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+ return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+ }
+
+ // XformSToSM2Imm - Return a target constant decremented by 2.
+ inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32);
+ }
+
+ // XformSToSM3Imm - Return a target constant decremented by 3.
+ inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32);
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "HexagonGenDAGISel.inc"
+
+private:
+ bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
+} // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new HexagonDAGToDAGISel(TM, OptLevel);
+}
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+ &SelectionDAGISel::ID, nullptr, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+// Intrinsics that return a a predicate.
+static bool doesIntrinsicReturnPredicate(unsigned ID) {
+ switch (ID) {
+ default:
+ return false;
+ case Intrinsic::hexagon_C2_cmpeq:
+ case Intrinsic::hexagon_C2_cmpgt:
+ case Intrinsic::hexagon_C2_cmpgtu:
+ case Intrinsic::hexagon_C2_cmpgtup:
+ case Intrinsic::hexagon_C2_cmpgtp:
+ case Intrinsic::hexagon_C2_cmpeqp:
+ case Intrinsic::hexagon_C2_bitsset:
+ case Intrinsic::hexagon_C2_bitsclr:
+ case Intrinsic::hexagon_C2_cmpeqi:
+ case Intrinsic::hexagon_C2_cmpgti:
+ case Intrinsic::hexagon_C2_cmpgtui:
+ case Intrinsic::hexagon_C2_cmpgei:
+ case Intrinsic::hexagon_C2_cmpgeui:
+ case Intrinsic::hexagon_C2_cmplt:
+ case Intrinsic::hexagon_C2_cmpltu:
+ case Intrinsic::hexagon_C2_bitsclri:
+ case Intrinsic::hexagon_C2_and:
+ case Intrinsic::hexagon_C2_or:
+ case Intrinsic::hexagon_C2_xor:
+ case Intrinsic::hexagon_C2_andn:
+ case Intrinsic::hexagon_C2_not:
+ case Intrinsic::hexagon_C2_orn:
+ case Intrinsic::hexagon_C2_pxfer_map:
+ case Intrinsic::hexagon_C2_any8:
+ case Intrinsic::hexagon_C2_all8:
+ case Intrinsic::hexagon_A2_vcmpbeq:
+ case Intrinsic::hexagon_A2_vcmpbgtu:
+ case Intrinsic::hexagon_A2_vcmpheq:
+ case Intrinsic::hexagon_A2_vcmphgt:
+ case Intrinsic::hexagon_A2_vcmphgtu:
+ case Intrinsic::hexagon_A2_vcmpweq:
+ case Intrinsic::hexagon_A2_vcmpwgt:
+ case Intrinsic::hexagon_A2_vcmpwgtu:
+ case Intrinsic::hexagon_C2_tfrrp:
+ case Intrinsic::hexagon_S2_tstbit_i:
+ case Intrinsic::hexagon_S2_tstbit_r:
+ return true;
+ }
+}
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ SDLoc dl) {
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ SDLoc dl) {
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+ MVT::i64, MVT::Other,
+ TargetConst0,
+ SDValue(Result_1,0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+ MVT::i64, MVT::Other,
+ TargetConst0,
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0), // Load value.
+ SDValue(Result_3, 0), // New address.
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT LoadedVT = LD->getMemoryVT();
+ unsigned Opcode = 0;
+
+ // Check for zero extended loads. Treat any-extend loads as zero extended
+ // loads.
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
+ bool HasVecOffset = false;
+
+ // Figure out the opcode.
+ if (LoadedVT == MVT::i64) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::L2_loadrd_pi;
+ else
+ Opcode = Hexagon::L2_loadrd_io;
+ } else if (LoadedVT == MVT::i32) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::L2_loadri_pi;
+ else
+ Opcode = Hexagon::L2_loadri_io;
+ } else if (LoadedVT == MVT::i16) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+ else
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+ } else if (LoadedVT == MVT::i8) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+ else
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 ||
+ LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai;
+ // 128B
+ } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 ||
+ LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi_128B;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai_128B;
+ } else
+ llvm_unreachable("unknown memory type");
+
+ // For zero extended i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
+ return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+ // Handle sign extended i64 loads.
+ if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
+ return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ if (HasVecOffset) {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 2);
+ } else {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ }
+ return Result;
+ } else {
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_1, 0),
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_1;
+ }
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+ SDNode *result;
+ SDLoc dl(N);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+ // Handle indexed loads.
+ if (AM != ISD::UNINDEXED) {
+ result = SelectIndexedLoad(LD, dl);
+ } else {
+ result = SelectCode(LD);
+ }
+
+ return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDValue Base = ST->getBasePtr();
+ SDValue Offset = ST->getOffset();
+ SDValue Value = ST->getValue();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT StoredVT = ST->getMemoryVT();
+ EVT ValueVT = Value.getValueType();
+
+ // Offset value must be within representable range
+ // and must have correct alignment properties.
+ if (HII->isValidAutoIncImm(StoredVT, Val)) {
+ unsigned Opcode = 0;
+
+ // Figure out the post inc version of opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) {
+ Opcode = Hexagon::V6_vS32b_pi;
+ }
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) {
+ Opcode = Hexagon::V6_vS32b_pi_128B;
+ } else llvm_unreachable("unknown memory type");
+
+ if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+ assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+ Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+ dl, MVT::i32, Value);
+ }
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value,
+ Chain};
+ // Build post increment store.
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(ST, Result);
+ ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+ return Result;
+ }
+
+ // Note: Order of operands matches the def of instruction:
+ // def S2_storerd_io
+ // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
+ // and it differs for POST_ST* for instance.
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8)
+ Opcode = Hexagon::V6_vS32b_ai;
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8)
+ Opcode = Hexagon::V6_vS32b_ai_128B;
+ else llvm_unreachable("unknown memory type");
+
+ // Build regular store.
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ // Build splitted incriment instruction.
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base,
+ TargetConstVal,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+ ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+ return Result_2;
+}
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+ SDLoc dl(N);
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+ // Handle indexed stores.
+ if (AM != ISD::UNINDEXED) {
+ return SelectIndexedStore(ST, dl);
+ }
+
+ return SelectCode(ST);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+ SDLoc dl(N);
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ //
+ // --- match with the following ---
+ //
+ // %mul.i = mpy (%tmp1, %add)
+ //
+
+ if (N->getValueType(0) == MVT::i64) {
+ // Shifting a i64 signed multiply.
+ SDValue MulOp0 = N->getOperand(0);
+ SDValue MulOp1 = N->getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload.
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0) == MVT::i32) {
+ SDValue Shl_0 = N->getOperand(0);
+ SDValue Shl_1 = N->getOperand(1);
+ // RHS is const.
+ if (Shl_1.getOpcode() == ISD::Constant) {
+ if (Shl_0.getOpcode() == ISD::MUL) {
+ SDValue Mul_0 = Shl_0.getOperand(0); // Val
+ SDValue Mul_1 = Shl_0.getOperand(1); // Const
+ // RHS of mul is const.
+ if (Mul_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t MulConst =
+ cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+ int32_t ValConst = MulConst << ShlConst;
+ SDValue Val = CurDAG->getTargetConstant(ValConst, dl,
+ MVT::i32);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
+ MVT::i32, Mul_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ }
+ } else if (Shl_0.getOpcode() == ISD::SUB) {
+ SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+ SDValue Sub_1 = Shl_0.getOperand(1); // Val
+ if (Sub_0.getOpcode() == ISD::Constant) {
+ int32_t SubConst =
+ cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+ if (SubConst == 0) {
+ if (Sub_1.getOpcode() == ISD::SHL) {
+ SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+ SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+ if (Shl2_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t Shl2Const =
+ cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+ int32_t ValConst = 1 << (ShlConst+Shl2Const);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, dl,
+ MVT::i32);
+ if (ConstantSDNode *CN =
+ dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32,
+ Shl2_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue Op0 = N->getOperand(0);
+ EVT OpVT = Op0.getValueType();
+ unsigned OpBW = OpVT.getSizeInBits();
+
+ // Special handling for zero-extending a vector of booleans.
+ if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+ SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+ unsigned NE = OpVT.getVectorNumElements();
+ EVT ExVT = N->getValueType(0);
+ unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+ uint64_t MV = 0, Bit = 1;
+ for (unsigned i = 0; i < NE; ++i) {
+ MV |= Bit;
+ Bit <<= ES;
+ }
+ SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
+ SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+ MVT::i64, Ones);
+ if (ExVT.getSizeInBits() == 32) {
+ SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+ MVT::i32);
+ return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+ SDValue(And,0), SubR);
+ }
+ return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ }
+
+ SDNode *IsIntrinsic = N->getOperand(0).getNode();
+ if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+ unsigned ID =
+ cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+ if (doesIntrinsicReturnPredicate(ID)) {
+ // Now we need to differentiate target data types.
+ if (N->getValueType(0) == MVT::i64) {
+ // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
+ MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 0));
+ ReplaceUses(N, Result_3);
+ return Result_3;
+ }
+ if (N->getValueType(0) == MVT::i32) {
+ // Convert the zero_extend to Rs = Pd
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ ReplaceUses(N, RsPd);
+ return RsPd;
+ }
+ llvm_unreachable("Unexpected value type");
+ }
+ }
+ return SelectCode(N);
+}
+
+//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::hexagon_circ_ldd ||
+ IntNo == Intrinsic::hexagon_circ_ldw ||
+ IntNo == Intrinsic::hexagon_circ_lduh ||
+ IntNo == Intrinsic::hexagon_circ_ldh ||
+ IntNo == Intrinsic::hexagon_circ_ldub ||
+ IntNo == Intrinsic::hexagon_circ_ldb) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+ SDValue Offset = N->getOperand(5);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ // Only the *_ld instructions push the extra return type, and bump the
+ // result node operand number correspondingly.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_circ_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pci_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pci_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd ||
+ IntNo == Intrinsic::hexagon_brev_ldw ||
+ IntNo == Intrinsic::hexagon_brev_ldh ||
+ IntNo == Intrinsic::hexagon_brev_lduh ||
+ IntNo == Intrinsic::hexagon_brev_ldb ||
+ IntNo == Intrinsic::hexagon_brev_ldub) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pbr_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pbr_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Bits;
+ switch (IID) {
+ case Intrinsic::hexagon_S2_vsplatrb:
+ Bits = 8;
+ break;
+ case Intrinsic::hexagon_S2_vsplatrh:
+ Bits = 16;
+ break;
+ default:
+ return SelectCode(N);
+ }
+
+ SDValue const &V = N->getOperand(1);
+ SDValue U;
+ if (isValueExtension(V, Bits, U)) {
+ SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), U);
+ return SelectCode(R.getNode());
+ }
+ return SelectCode(N);
+}
+
+//
+// Map floating point constant values.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
+ SDLoc dl(N);
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ APFloat APF = CN->getValueAPF();
+ if (N->getValueType(0) == MVT::f32) {
+ return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
+ CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32));
+ }
+ else if (N->getValueType(0) == MVT::f64) {
+ return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
+ CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64));
+ }
+
+ return SelectCode(N);
+}
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0) == MVT::i1) {
+ SDNode* Result = 0;
+ int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Val == -1) {
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+ } else if (Val == 0) {
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+ }
+ if (Result) {
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+ // Identify nodes of the form: add(asr(...)).
+ SDNode* Src1 = N->getOperand(0).getNode();
+ if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+ || Src1->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+ // Rd and Rd' are assigned to the same register
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32,
+ N->getOperand(1),
+ Src1->getOperand(0),
+ Src1->getOperand(1));
+ ReplaceUses(N, Result);
+
+ return Result;
+}
+
+//
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+ SDLoc dl(N);
+ EVT ValueVT = N->getValueType(0);
+
+ // We handle only 32 and 64-bit bit ops.
+ if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+ ValueVT == MVT::f32 || ValueVT == MVT::f64))
+ return SelectCode(N);
+
+ // We handly only fabs and fneg for V5.
+ unsigned Opc = N->getOpcode();
+ if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+ return SelectCode(N);
+
+ int64_t Val = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+ else
+ return SelectCode(N);
+ }
+
+ if (Opc == ISD::AND) {
+ // Check if this is a bit-clearing AND, if not select code the usual way.
+ if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
+ (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
+ Val = ~Val;
+ else
+ return SelectCode(N);
+ }
+
+ // If OR or AND is being fed by shl, srl and, sra don't do this change,
+ // because Hexagon provide |= &= on shl, srl, and sra.
+ // Traverse the DAG to see if there is shl, srl and sra.
+ if (Opc == ISD::OR || Opc == ISD::AND) {
+ switch (N->getOperand(0)->getOpcode()) {
+ default:
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return SelectCode(N);
+ }
+ }
+
+ // Make sure it's power of 2.
+ unsigned BitPos = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
+ (ValueVT == MVT::i64 && !isPowerOf2_64(Val)))
+ return SelectCode(N);
+
+ // Get the bit position.
+ BitPos = countTrailingZeros(uint64_t(Val));
+ } else {
+ // For fabs and fneg, it's always the 31st bit.
+ BitPos = 31;
+ }
+
+ unsigned BitOpc = 0;
+ // Set the right opcode for bitwise operations.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ case ISD::AND:
+ case ISD::FABS:
+ BitOpc = Hexagon::S2_clrbit_i;
+ break;
+ case ISD::OR:
+ BitOpc = Hexagon::S2_setbit_i;
+ break;
+ case ISD::XOR:
+ case ISD::FNEG:
+ BitOpc = Hexagon::S2_togglebit_i;
+ break;
+ }
+
+ SDNode *Result;
+ // Get the right SDVal for the opcode.
+ SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
+
+ if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+ Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+ N->getOperand(0), SDVal);
+ } else {
+ // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+ EVT SubValueVT;
+ if (ValueVT == MVT::i64)
+ SubValueVT = MVT::i32;
+ else
+ SubValueVT = MVT::f32;
+
+ SDNode *Reg = N->getOperand(0).getNode();
+ SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+ dl, MVT::i64);
+
+ SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl,
+ MVT::i32);
+ SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+ MVT::i32);
+
+ SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ // Clear/set/toggle hi or lo registers depending on the bit position.
+ if (SubValueVT != MVT::f32 && BitPos < 32) {
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregLO, SDVal);
+ const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+ SDValue(Result0, 0), SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ } else {
+ if (Opc != ISD::FABS && Opc != ISD::FNEG)
+ SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregHI, SDVal);
+ const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+ SubregLO, SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ }
+ }
+
+ ReplaceUses(N, Result);
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ const HexagonFrameLowering *HFI = HST->getFrameLowering();
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ unsigned StkA = HFI->getStackAlignment();
+ unsigned MaxA = MFI->getMaxAlignment();
+ SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ SDLoc DL(N);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ SDNode *R = 0;
+
+ // Use TFR_FI when:
+ // - the object is fixed, or
+ // - there are no objects with higher-than-default alignment, or
+ // - there are no dynamically allocated objects.
+ // Otherwise, use TFR_FIA.
+ if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) {
+ R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+ } else {
+ auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
+ unsigned AR = HMFI.getStackAlignBaseVReg();
+ SDValue CH = CurDAG->getEntryNode();
+ SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
+ R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops);
+ }
+
+ if (N->getHasDebugValue())
+ CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+ return R;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+
+ case ISD::ConstantFP:
+ return SelectConstantFP(N);
+
+ case ISD::FrameIndex:
+ return SelectFrameIndex(N);
+
+ case ISD::ADD:
+ return SelectAdd(N);
+
+ case ISD::SHL:
+ return SelectSHL(N);
+
+ case ISD::LOAD:
+ return SelectLoad(N);
+
+ case ISD::STORE:
+ return SelectStore(N);
+
+ case ISD::MUL:
+ return SelectMul(N);
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FABS:
+ case ISD::FNEG:
+ return SelectBitOp(N);
+
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
+
+ case ISD::INTRINSIC_W_CHAIN:
+ return SelectIntrinsicWChain(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
+ }
+
+ return SelectCode(N);
+}
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ SDValue Inp = Op, Res;
+
+ switch (ConstraintID) {
+ default:
+ return true;
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_o: // Offsetable.
+ case InlineAsm::Constraint_v: // Not offsetable.
+ case InlineAsm::Constraint_m: // Memory.
+ if (SelectAddrFI(Inp, Res))
+ OutOps.push_back(Res);
+ else
+ OutOps.push_back(Inp);
+ break;
+ }
+
+ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+ return false;
+}
+
+
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG &DAG = *CurDAG;
+ std::vector<SDNode*> Nodes;
+ for (SDNode &Node : DAG.allnodes())
+ Nodes.push_back(&Node);
+
+ // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ // This may not be the right thing for all targets, so do it here.
+ for (auto I: Nodes) {
+ if (I->getOpcode() != ISD::OR)
+ continue;
+
+ auto IsZero = [] (const SDValue &V) -> bool {
+ if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+ return SC->isNullValue();
+ return false;
+ };
+ auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+ if (Op.getOpcode() != ISD::SELECT)
+ return false;
+ return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
+ };
+
+ SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+ EVT VT = I->getValueType(0);
+ bool SelN0 = IsSelect0(N0);
+ SDValue SOp = SelN0 ? N0 : N1;
+ SDValue VOp = SelN0 ? N1 : N0;
+
+ if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+ SDValue SC = SOp.getOperand(0);
+ SDValue SX = SOp.getOperand(1);
+ SDValue SY = SOp.getOperand(2);
+ SDLoc DLS = SOp;
+ if (IsZero(SY)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ } else if (IsZero(SX)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ }
+ }
+ }
+}
+
+void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
+ auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+ auto &HFI = *HST.getFrameLowering();
+ if (!HFI.needsAligna(*MF))
+ return;
+
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineBasicBlock *EntryBB = &MF->front();
+ unsigned AR = FuncInfo->CreateReg(MVT::i32);
+ unsigned MaxA = MFI->getMaxAlignment();
+ BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
+ .addImm(MaxA);
+ MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
+}
+
+// Match a frame index that can be used in an addressing mode.
+bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
+ if (N.getOpcode() != ISD::FrameIndex)
+ return false;
+ auto &HFI = *HST->getFrameLowering();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
+ return false;
+ R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ return true;
+}
+
+inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
+ return SelectGlobalAddress(N, R, false);
+}
+
+inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
+ return SelectGlobalAddress(N, R, true);
+}
+
+bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
+ bool UseGP) {
+ switch (N.getOpcode()) {
+ case ISD::ADD: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ unsigned GAOpc = N0.getOpcode();
+ if (UseGP && GAOpc != HexagonISD::CONST32_GP)
+ return false;
+ if (!UseGP && GAOpc != HexagonISD::CONST32)
+ return false;
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
+ SDValue Addr = N0.getOperand(0);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
+ if (GA->getOpcode() == ISD::TargetGlobalAddress) {
+ uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
+ R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
+ N.getValueType(), NewOff);
+ return true;
+ }
+ }
+ }
+ break;
+ }
+ case HexagonISD::CONST32:
+ // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
+ // want in the instruction.
+ if (!UseGP)
+ R = N.getOperand(0);
+ return !UseGP;
+ case HexagonISD::CONST32_GP:
+ if (UseGP)
+ R = N.getOperand(0);
+ return UseGP;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+ unsigned FromBits, SDValue &Src) {
+ unsigned Opc = Val.getOpcode();
+ switch (Opc) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ SDValue const &Op0 = Val.getOperand(0);
+ EVT T = Op0.getValueType();
+ if (T.isInteger() && T.getSizeInBits() == FromBits) {
+ Src = Op0;
+ return true;
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ if (Val.getOperand(0).getValueType().isInteger()) {
+ VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
+ if (T->getVT().getSizeInBits() == FromBits) {
+ Src = Val.getOperand(0);
+ return true;
+ }
+ }
+ break;
+ case ISD::AND: {
+ // Check if this is an AND with "FromBits" of lower bits set to 1.
+ uint64_t FromMask = (1 << FromBits) - 1;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+ if (C->getZExtValue() == FromMask) {
+ Src = Val.getOperand(1);
+ return true;
+ }
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+ if (C->getZExtValue() == FromMask) {
+ Src = Val.getOperand(0);
+ return true;
+ }
+ }
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR: {
+ // OR/XOR with the lower "FromBits" bits set to 0.
+ uint64_t FromMask = (1 << FromBits) - 1;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+ if ((C->getZExtValue() & FromMask) == 0) {
+ Src = Val.getOperand(1);
+ return true;
+ }
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+ if ((C->getZExtValue() & FromMask) == 0) {
+ Src = Val.getOperand(0);
+ return true;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 0000000..0167090
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,2894 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-lowering"
+
+static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
+ cl::init(true), cl::Hidden,
+ cl::desc("Control jump table emission on Hexagon target"));
+
+static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon SDNode scheduling"));
+
+static cl::opt<bool> EnableFastMath("ffast-math",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Fast Math processing"));
+
+static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
+ cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Set minimum jump tables"));
+
+static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
+ cl::Hidden, cl::ZeroOrMore, cl::init(6),
+ cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
+ cl::Hidden, cl::ZeroOrMore, cl::init(6),
+ cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
+ cl::Hidden, cl::ZeroOrMore, cl::init(8),
+ cl::desc("Max #stores to inline memset"));
+
+static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
+ cl::Hidden, cl::ZeroOrMore, cl::init(4),
+ cl::desc("Max #stores to inline memset"));
+
+
+namespace {
+class HexagonCCState : public CCState {
+ unsigned NumNamedVarArgParams;
+
+public:
+ HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ int NumNamedVarArgParams)
+ : CCState(CC, isVarArg, MF, locs, C),
+ NumNamedVarArgParams(NumNamedVarArgParams) {}
+
+ unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+};
+}
+
+// Implement calling convention for Hexagon.
+
+static bool IsHvxVectorType(MVT ty);
+
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ HexagonCCState &HState = static_cast<HexagonCCState &>(State);
+
+ if (ValNo < HState.getNumNamedVarArgParams()) {
+ // Deal with named arguments.
+ return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+ }
+
+ // Deal with un-named arguments.
+ unsigned ofst;
+ if (ArgFlags.isByVal()) {
+ // If pass-by-value, the size allocated on stack is decided
+ // by ArgFlags.getByValSize(), not by the size of LocVT.
+ ofst = State.AllocateStack(ArgFlags.getByValSize(),
+ ArgFlags.getByValAlign());
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ ofst = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ ofst = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
+ LocVT == MVT::v16i8) {
+ ofst = State.AllocateStack(16, 16);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
+ LocVT == MVT::v32i8) {
+ ofst = State.AllocateStack(32, 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
+ ofst = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
+ ofst = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8) {
+ ofst = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+
+ llvm_unreachable(nullptr);
+}
+
+
+static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (ArgFlags.isByVal()) {
+ // Passed on stack.
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
+ ArgFlags.getByValAlign());
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
+ }
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (IsHvxVectorType(LocVT)) {
+ if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const MCPhysReg RegList[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ static const MCPhysReg RegList1[] = {
+ Hexagon::D1, Hexagon::D2
+ };
+ static const MCPhysReg RegList2[] = {
+ Hexagon::R1, Hexagon::R3
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
+ Hexagon::V2, Hexagon::V3,
+ Hexagon::V4, Hexagon::V5,
+ Hexagon::V6, Hexagon::V7,
+ Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11,
+ Hexagon::V12, Hexagon::V13,
+ Hexagon::V14, Hexagon::V15};
+ static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
+ Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5,
+ Hexagon::W6, Hexagon::W7};
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ // 128B Mode
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ return true;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ if (LocVT == MVT::i1 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
+ LocVT == MVT::v512i1) {
+ LocVT = MVT::v16i32;
+ ValVT = MVT::v16i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
+ (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
+ LocVT = MVT::v32i32;
+ ValVT = MVT::v32i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
+ LocVT = MVT::v64i32;
+ ValVT = MVT::v64i32;
+ LocInfo = CCValAssign::Full;
+ }
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+ if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
+ if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+ return true; // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ unsigned OffSiz = 64;
+ if (LocVT == MVT::v16i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ } else if (LocVT == MVT::v32i32) {
+ unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
+ if (unsigned Reg = State.AllocateReg(Req)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 128;
+ } else if (LocVT == MVT::v64i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 256;
+ }
+
+ unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+ return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter. Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ SDLoc dl) {
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+static bool IsHvxVectorType(MVT ty) {
+ return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
+ ty == MVT::v64i8 ||
+ ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
+ ty == MVT::v128i8 ||
+ ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
+ ty == MVT::v256i8 ||
+ ty == MVT::v512i1 || ty == MVT::v1024i1);
+}
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
+}
+
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ // If either no tail call or told not to tail call at all, don't.
+ auto Attr =
+ CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+ if (!CI->isTailCall() || Attr.getValueAsString() == "true")
+ return false;
+
+ return true;
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+ bool doesNotReturn = CLI.DoesNotReturn;
+
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+
+ // Check for varargs.
+ int NumNamedVarArgParams = -1;
+ if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = GAN->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ if (const Function* F = dyn_cast<Function>(GV)) {
+ // If a function has zero args and is a vararg function, that's
+ // disallowed so it must be an undeclared function. Do not assume
+ // varargs if the callee is undefined.
+ if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
+ NumNamedVarArgParams = F->getFunctionType()->getNumParams();
+ }
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), NumNamedVarArgParams);
+
+ if (isVarArg)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+ auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+ if (Attr.getValueAsString() == "true")
+ isTailCall = false;
+
+ if (isTailCall) {
+ bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet,
+ StructAttrFlag,
+ Outs, OutVals, Ins, DAG);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ isTailCall = false;
+ break;
+ }
+ }
+ DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
+ : "Argument must be passed on stack. "
+ "Not eligible for Tail Call\n"));
+ }
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ auto &HRI = *Subtarget.getRegisterInfo();
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
+
+ bool NeedsArgAlign = false;
+ unsigned LargestAlignSeen = 0;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Record if we need > 8 byte alignment on an argument.
+ bool ArgAlign = IsHvxVectorType(VA.getValVT());
+ NeedsArgAlign |= ArgAlign;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ // Loc info must be one of Full, SExt, ZExt, or AExt.
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::BCvt:
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isMemLoc()) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
+ StackPtr.getValueType());
+ MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
+ if (ArgAlign)
+ LargestAlignSeen = std::max(LargestAlignSeen,
+ VA.getLocVT().getStoreSizeInBits() >> 3);
+ if (Flags.isByVal()) {
+ // The argument is a struct passed by value. According to LLVM, "Arg"
+ // is is pointer.
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
+ Flags, DAG, dl));
+ } else {
+ MachinePointerInfo LocPI = MachinePointerInfo::getStack(
+ DAG.getMachineFunction(), LocMemOffset);
+ SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
+ false, 0);
+ MemOpChains.push_back(S);
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector.
+ if (VA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ }
+
+ if (NeedsArgAlign && Subtarget.hasV60TOps()) {
+ DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+ MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+ // V6 vectors passed by value have 64 or 128 byte alignment depending
+ // on whether we are 64 byte vector mode or 128 byte.
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
+ assert(Subtarget.useHVXOps());
+ const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
+ LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+ MFI->ensureMaxAlignment(LargestAlignSeen);
+ }
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ if (!isTailCall) {
+ SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
+ Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ } else {
+ // For tail calls lower the arguments to the 'real' stack slot.
+ //
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ //
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag = SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (isTailCall) {
+ MF.getFrameInfo()->setHasTailCall();
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
+ }
+
+ int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
+ Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD)
+ return false;
+
+ auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ bool ValidHVXDblType =
+ (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8);
+ bool ValidHVXType =
+ UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8);
+
+ if (ValidHVXDblType || ValidHVXType ||
+ VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ // Ensure that Offset is a constant.
+ return (isa<ConstantSDNode>(Offset));
+ }
+
+ return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const
+{
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ bool isInc = false;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ auto &HII = *Subtarget.getInstrInfo();
+ int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ if (HII.isValidAutoIncImm(VT, OffsetVal)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+ switch (Node->getOpcode()) {
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ if (FuncInfo.hasClobberLR())
+ break;
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_Mem: {
+ for (; NumVals; --NumVals, ++i) {}
+ break;
+ }
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+ // Check it to be lr
+ const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
+ if (Reg == QRI->getRARegister()) {
+ FuncInfo.setHasClobberLR(true);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ } // Node->getOpcode
+ return Op;
+}
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
+ assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
+
+ unsigned A = AlignConst->getSExtValue();
+ auto &HFI = *Subtarget.getFrameLowering();
+ // "Zero" means natural stack alignment.
+ if (A == 0)
+ A = HFI.getStackAlignment();
+
+ DEBUG({
+ dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
+ Size.getNode()->dump(&DAG);
+ dbgs() << "\n";
+ });
+
+ SDValue AC = DAG.getConstant(A, dl, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+ if (Op.getNode()->getHasDebugValue())
+ DAG.TransferDbgValues(Op, AA);
+ return AA;
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+ // For LLVM, in the case when returning a struct by value (>8byte),
+ // the first argument is a pointer that points to the location on caller's
+ // stack where the return value will be stored. For Hexagon, the location on
+ // caller's stack is passed only when the struct size is smaller than (and
+ // equal to) 8 bytes. If not, no address will be passed into callee and
+ // callee return the result direclty through R0/R1.
+
+ SmallVector<SDValue, 8> MemOps;
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ unsigned ObjSize;
+ unsigned StackLocation;
+ int FI;
+
+ if ( (VA.isRegLoc() && !Flags.isByVal())
+ || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+ // Arguments passed in registers
+ // 1. int, long long, ptr args that get allocated in register.
+ // 2. Large struct that gets an register to put its address in.
+ EVT RegVT = VA.getLocVT();
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
+ RegVT == MVT::i32 || RegVT == MVT::f32) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Single Vector
+ } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
+ RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Double Vector
+ } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
+ RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
+ assert(0 && "need to support VecPred regs");
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else {
+ assert (0);
+ }
+ } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+ assert (0 && "ByValSize must be bigger than 8 bytes");
+ } else {
+ // Sanity check.
+ assert(VA.isMemLoc());
+
+ if (Flags.isByVal()) {
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ ObjSize = Flags.getByValSize();
+ } else {
+ ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+ }
+
+ StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ // Create the frame index object for this incoming parameter...
+ FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+ // Create the SelectionDAG nodes cordl, responding to a load
+ // from this parameter.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+ if (Flags.isByVal()) {
+ // If it's a pass-by-value aggregate, then do not dereference the stack
+ // location. Instead, we should generate a reference to the stack
+ // location.
+ InVals.push_back(FIN);
+ } else {
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo(), false, false,
+ false, 0));
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ if (isVarArg) {
+ // This will point to the next argument passed via stack.
+ int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+ HEXAGON_LRFP_SIZE +
+ CCInfo.getNextStackOffset(),
+ true);
+ FuncInfo.setVarArgsFrameIndex(FrameIndex);
+ }
+
+ return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // VASTART stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr,
+ Op.getOperand(1), MachinePointerInfo(SV), false,
+ false, 0);
+}
+
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+ if (VT.getSimpleVT() == MVT::v4i8)
+ return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+ if (VT.getSimpleVT() == MVT::v4i16)
+ return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+ return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+ // A sign-extend of a truncate of a sign-extend is free.
+ if (N.getOpcode() == ISD::TRUNCATE &&
+ N.getOperand(0).getOpcode() == ISD::AssertSext)
+ return true;
+ // We have sign-extended loads.
+ if (N.getOpcode() == ISD::LOAD)
+ return true;
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue InpVal = Op.getOperand(0);
+ if (isa<ConstantSDNode>(InpVal)) {
+ uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+ return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
+ }
+ SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Cmp = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+ EVT VT = Op.getValueType();
+ EVT LHSVT = LHS.getValueType();
+ EVT RHSVT = RHS.getValueType();
+
+ if (LHSVT == MVT::v2i16) {
+ assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+ unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+ SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+ SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+ return SC;
+ }
+
+ // Treat all other vector types as legal.
+ if (VT.isVector())
+ return Op;
+
+ // Equals and not equals should use sign-extend, not zero-extend, since
+ // we can represent small negative values in the compare instructions.
+ // The LLVM default is to use zero-extend arbitrarily in these cases.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+ (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (C && C->getAPIntValue().isNegative()) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ if (isSExtFree(LHS) || isSExtFree(RHS)) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+SDValue
+HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue PredOp = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+ EVT OpVT = Op1.getValueType();
+ SDLoc DL(Op);
+
+ if (OpVT == MVT::v2i16) {
+ SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+ SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+ SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+ SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+ return TR;
+ }
+
+ return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+ SDValue Result;
+ SDValue Base = LoadNode->getBasePtr();
+ ISD::LoadExtType Ext = LoadNode->getExtensionType();
+ unsigned Alignment = LoadNode->getAlignment();
+ SDValue LoadChain;
+
+ if(Ext == ISD::NON_EXTLOAD)
+ Ext = ISD::ZEXTLOAD;
+
+ if (VT == MVT::v4i16) {
+ if (Alignment == 2) {
+ SDValue Loads[4];
+ // Base load.
+ Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base+2 load.
+ SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base and base+2.
+ SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+ SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+ // Base + 4.
+ Increment = DAG.getConstant(4, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base + 6.
+ Increment = DAG.getConstant(6, DL, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base+4 and base+6.
+ Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+ // Combine to i64. This could be optimised out later if we can
+ // affect reg allocation of this code.
+ Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+ LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Loads[0].getValue(1), Loads[1].getValue(1),
+ Loads[2].getValue(1), Loads[3].getValue(1));
+ } else {
+ // Perform default type expansion.
+ Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+ LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(), LoadNode->getAlignment());
+ LoadChain = Result.getValue(1);
+ }
+ } else
+ llvm_unreachable("Custom lowering unsupported load");
+
+ Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ // Since we pretend to lower a load, we need the original chain
+ // info attached to the result.
+ SDValue Ops[] = { Result, LoadChain };
+
+ return DAG.getMergeValues(Ops, DL);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValTy = Op.getValueType();
+ ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
+ unsigned Align = CPN->getAlignment();
+ Reloc::Model RM = HTM.getRelocationModel();
+ unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0;
+
+ SDValue T;
+ if (CPN->isMachineConstantPoolEntry())
+ T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
+ else
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+ if (RM == Reloc::PIC_)
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
+ return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int Idx = cast<JumpTableSDNode>(Op)->getIndex();
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::PIC_) {
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
+ }
+
+ SDValue T = DAG.getTargetJumpTable(Idx, VT);
+ return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ HRI.getFrameRegister(), VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+SDValue
+HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue
+HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ auto *GAN = cast<GlobalAddressSDNode>(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto *GV = GAN->getGlobal();
+ int64_t Offset = GAN->getOffset();
+
+ auto &HLOF = *HTM.getObjFileLowering();
+ Reloc::Model RM = HTM.getRelocationModel();
+
+ if (RM == Reloc::Static) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+ if (HLOF.IsGlobalInSmallSection(GV, HTM))
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
+ return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
+ }
+
+ bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV));
+ if (UsePCRel) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
+ }
+
+ // Use GOT index.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
+ SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
+ return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
+}
+
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDLoc dl(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::Static) {
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
+ }
+
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
+}
+
+SDValue
+HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
+ const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
+ const HexagonSubtarget &ST)
+ : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
+ Subtarget(ST) {
+ bool IsV4 = !Subtarget.hasV5TOps();
+ auto &HRI = *Subtarget.getRegisterInfo();
+ bool UseHVX = Subtarget.useHVXOps();
+ bool UseHVXSgl = Subtarget.useHVXSglOps();
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
+
+ setPrefLoopAlignment(4);
+ setPrefFunctionAlignment(4);
+ setMinFunctionAlignment(2);
+ setInsertFencesForAtomic(false);
+ setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+
+ if (EnableHexSDNodeSched)
+ setSchedulingPreference(Sched::VLIW);
+ else
+ setSchedulingPreference(Sched::Source);
+
+ // Limits for inline expansion of memcpy/memmove
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
+ MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
+ MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
+ MaxStoresPerMemset = MaxStoresPerMemsetCL;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
+
+ //
+ // Set up register classes.
+ //
+
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+ addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
+ addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
+ addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
+
+ if (Subtarget.hasV5TOps()) {
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+ }
+
+ if (Subtarget.hasV60TOps()) {
+ if (Subtarget.useHVXSglOps()) {
+ addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
+ } else if (Subtarget.useHVXDblOps()) {
+ addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
+ }
+
+ }
+
+ //
+ // Handling of scalar operations.
+ //
+ // All operations default to "legal", except:
+ // - indexed loads and stores (pre-/post-incremented),
+ // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
+ // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
+ // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
+ // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
+ // which default to "expand" for at least one type.
+
+ // Misc operations.
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
+
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+ // Hexagon needs to optimize cases with negative constants.
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
+ if (EmitJumpTables)
+ setMinimumJumpTableEntries(2);
+ else
+ setMinimumJumpTableEntries(MinimumJumpTables);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // Hexagon has instructions for add/sub with carry. The problem with
+ // modeling these instructions is that they produce 2 results: Rdd and Px.
+ // To model the update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't have
+ // these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
+ // Only add and sub that detect overflow are the saturating ones.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ }
+
+ setOperationAction(ISD::CTLZ, MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i8, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
+
+ // In V5, popcount can count # of 1s in i64 but returns i32.
+ // On V4 it will be expanded (set later).
+ setOperationAction(ISD::CTPOP, MVT::i8, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+ // We custom lower i64 to i64 mul, so that it is not considered as a legal
+ // operation. There is a pattern that will match i64 mul and transform it
+ // to a series of instructions.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+ for (unsigned IntExpOp :
+ { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
+ ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
+ setOperationAction(IntExpOp, MVT::i32, Expand);
+ setOperationAction(IntExpOp, MVT::i64, Expand);
+ }
+
+ for (unsigned FPExpOp :
+ {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+ ISD::FPOW, ISD::FCOPYSIGN}) {
+ setOperationAction(FPExpOp, MVT::f32, Expand);
+ setOperationAction(FPExpOp, MVT::f64, Expand);
+ }
+
+ // No extending loads from i32.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ }
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ // Turn FP extload into load/fextend.
+ for (MVT VT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+
+ // Expand BR_CC and SELECT_CC for all integer and fp types.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ }
+ for (MVT VT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ }
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ //
+ // Handling of vector operations.
+ //
+
+ // Custom lower v4i16 load only. Let v4i16 store to be
+ // promoted for now.
+ promoteLdStType(MVT::v4i8, MVT::i32);
+ promoteLdStType(MVT::v2i16, MVT::i32);
+ promoteLdStType(MVT::v8i8, MVT::i64);
+ promoteLdStType(MVT::v2i32, MVT::i64);
+
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
+ AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+
+ // Set the action for vector operations to "expand", then override it with
+ // either "custom" or "legal" for specific cases.
+ static const unsigned VectExpOps[] = {
+ // Integer arithmetic:
+ ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
+ ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ // Logical/bit:
+ ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
+ ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ_ZERO_UNDEF,
+ // Floating point arithmetic/math functions:
+ ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
+ ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
+ ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
+ ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
+ // Misc:
+ ISD::SELECT, ISD::ConstantPool,
+ // Vector:
+ ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
+ ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
+ ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
+ ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE
+ };
+
+ for (MVT VT : MVT::vector_valuetypes()) {
+ for (unsigned VectExpOp : VectExpOps)
+ setOperationAction(VectExpOp, VT, Expand);
+
+ // Expand all extended loads and truncating stores:
+ for (MVT TargetVT : MVT::vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
+ setTruncStoreAction(VT, TargetVT, Expand);
+ }
+
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ }
+
+ // Types natively supported:
+ for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
+ MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
+ MVT::v2i32, MVT::v1i64}) {
+ setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom);
+
+ setOperationAction(ISD::ADD, NativeVT, Legal);
+ setOperationAction(ISD::SUB, NativeVT, Legal);
+ setOperationAction(ISD::MUL, NativeVT, Legal);
+ setOperationAction(ISD::AND, NativeVT, Legal);
+ setOperationAction(ISD::OR, NativeVT, Legal);
+ setOperationAction(ISD::XOR, NativeVT, Legal);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ if (UseHVX) {
+ if (UseHVXSgl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom);
+ } else if (UseHVXDbl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom);
+ } else {
+ llvm_unreachable("Unrecognized HVX mode");
+ }
+ }
+ // Subtarget-specific operation actions.
+ //
+ if (Subtarget.hasV5TOps()) {
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+ } else { // V4
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // Expand these operations for both f32 and f64:
+ for (unsigned FPExpOpV4 :
+ {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
+ setOperationAction(FPExpOpV4, MVT::f32, Expand);
+ setOperationAction(FPExpOpV4, MVT::f64, Expand);
+ }
+
+ for (ISD::CondCode FPExpCCV4 :
+ {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
+ ISD::SETUO, ISD::SETO}) {
+ setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
+ setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
+ }
+ }
+
+ // Handling of indexed loads/stores: default is "expand".
+ //
+ for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
+ setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
+ }
+
+ if (UseHVXDbl) {
+ for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+ setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+ setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+ }
+ }
+
+ computeRegisterProperties(&HRI);
+
+ //
+ // Library calls for unsupported operations
+ //
+ bool FastMath = EnableFastMath;
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ if (IsV4) {
+ // Handle single-precision floating point operations on V4.
+ if (FastMath) {
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
+ // Double-precision compares.
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
+ } else {
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ // Double-precision compares.
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ }
+ }
+
+ // This is the only fast library function for sqrtd.
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
+
+ // Prefix is: nothing for "slow-math",
+ // "fast2_" for V4 fast-math and V5+ fast-math double-precision
+ // (actually, keep fast-math and fast-math2 separate for now)
+ if (FastMath) {
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
+ // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
+ } else {
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ }
+
+ if (Subtarget.hasV5TOps()) {
+ if (FastMath)
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+ else
+ setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
+ } else {
+ // V4
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ }
+
+ // These cause problems when the shift amount is non-constant.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+}
+
+
+const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((HexagonISD::NodeType)Opcode) {
+ case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
+ case HexagonISD::ARGEXTEND: return "HexagonISD::ARGEXTEND";
+ case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
+ case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
+ case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
+ case HexagonISD::CALLR: return "HexagonISD::CALLR";
+ case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
+ case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
+ case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CP: return "HexagonISD::CP";
+ case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
+ case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+ case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
+ case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP";
+ case HexagonISD::FCONST32: return "HexagonISD::FCONST32";
+ case HexagonISD::INSERT: return "HexagonISD::INSERT";
+ case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
+ case HexagonISD::JT: return "HexagonISD::JT";
+ case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
+ case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
+ case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
+ case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB";
+ case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ";
+ case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT";
+ case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU";
+ case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ";
+ case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT";
+ case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU";
+ case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
+ case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
+ case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
+ case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
+ case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
+ case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
+ case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH";
+ case HexagonISD::VSRAH: return "HexagonISD::VSRAH";
+ case HexagonISD::VSRAW: return "HexagonISD::VSRAW";
+ case HexagonISD::VSRLH: return "HexagonISD::VSRLH";
+ case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
+ case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
+ case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::OP_END: break;
+ }
+ return nullptr;
+}
+
+bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple())
+ return false;
+ return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple())
+ return false;
+ return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+}
+
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const {
+
+ // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+ EVT EltVT = VT.getVectorElementType();
+ int EltBits = EltVT.getSizeInBits();
+ if ((EltBits != 8) && (EltBits != 16))
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ if (SVN->isSplat()) {
+ int Lane = SVN->getSplatIndex();
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+ }
+ return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
+ }
+
+ // FIXME: We need to support more general vector shuffles. See
+ // below the comment from the ARM backend that deals in the general
+ // case with the vector shuffles. For now, let expand handle these.
+ return SDValue();
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
+
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+ unsigned NElts = BVN->getNumOperands();
+ SDValue V0 = BVN->getOperand(0);
+
+ for (unsigned i = 1, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i) != V0)
+ return false;
+ }
+ return true;
+}
+
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+ BuildVectorSDNode *BVN = 0;
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDValue V3;
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V2;
+ else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V1;
+ else
+ return SDValue();
+
+ SDValue CommonSplat = BVN->getOperand(0);
+ SDValue Result;
+
+ if (VT.getSimpleVT() == MVT::v4i16) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else if (VT.getSimpleVT() == MVT::v2i32) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
+
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ unsigned Size = VT.getSizeInBits();
+
+ // Only handle vectors of 64 bits or shorter.
+ if (Size > 64)
+ return SDValue();
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned NElts = BVN->getNumOperands();
+
+ // Try to generate a SPLAT instruction.
+ if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+ (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+ (32 - SplatBitSize));
+ return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
+ }
+
+ // Try to generate COMBINE to build v2i32 vectors.
+ if (VT.getSimpleVT() == MVT::v2i32) {
+ SDValue V0 = BVN->getOperand(0);
+ SDValue V1 = BVN->getOperand(1);
+
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = DAG.getConstant(0, dl, MVT::i32);
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = DAG.getConstant(0, dl, MVT::i32);
+
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+ // If the element isn't a constant, it is in a register:
+ // generate a COMBINE Register Register instruction.
+ if (!C0 || !C1)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+ // If one of the operands is an 8 bit integer constant, generate
+ // a COMBINE Immediate Immediate instruction.
+ if (isInt<8>(C0->getSExtValue()) ||
+ isInt<8>(C1->getSExtValue()))
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+ }
+
+ // Try to generate a S2_packhl to build v2i16 vectors.
+ if (VT.getSimpleVT() == MVT::v2i16) {
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+ // If the element isn't a constant, it is in a register:
+ // generate a S2_packhl instruction.
+ if (!Cst) {
+ SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+ BVN->getOperand(1), BVN->getOperand(0));
+
+ return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+ pack);
+ }
+ }
+ }
+
+ // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+ // and insert_vector_elt for all the other cases.
+ uint64_t Res = 0;
+ unsigned EltSize = Size / NElts;
+ SDValue ConstVal;
+ uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+ bool HasNonConstantElements = false;
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+ // combine, const64, etc. are Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (Operand.getOpcode() == ISD::UNDEF)
+ continue;
+
+ int64_t Val = 0;
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+ Val = Cst->getSExtValue();
+ else
+ HasNonConstantElements = true;
+
+ Val &= Mask;
+ Res = (Res << EltSize) | Val;
+ }
+
+ if (Size == 64)
+ ConstVal = DAG.getConstant(Res, dl, MVT::i64);
+ else
+ ConstVal = DAG.getConstant(Res, dl, MVT::i32);
+
+ // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+ // ConstVal, the constant part of the vector.
+ if (HasNonConstantElements) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+ // is Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (isa<ConstantSDNode>(Operand))
+ // This operand is already in ConstVal.
+ continue;
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
+
+ SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ bool UseHVX = Subtarget.useHVXOps();
+ EVT VT = Op.getValueType();
+ unsigned NElts = Op.getNumOperands();
+ SDValue Vec0 = Op.getOperand(0);
+ EVT VecVT = Vec0.getValueType();
+ unsigned Width = VecVT.getSizeInBits();
+
+ if (NElts == 2) {
+ MVT ST = VecVT.getSimpleVT();
+ // We are trying to concat two v2i16 to a single v4i16, or two v4i8
+ // into a single v8i8.
+ if (ST == MVT::v2i16 || ST == MVT::v4i8)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
+
+ if (UseHVX) {
+ assert((Width == 64*8 && Subtarget.useHVXSglOps()) ||
+ (Width == 128*8 && Subtarget.useHVXDblOps()));
+ SDValue Vec1 = Op.getOperand(1);
+ MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
+ MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
+ SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
+ SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
+ SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
+ return DAG.getNode(ISD::BITCAST, dl, VT, VC);
+ }
+ }
+
+ if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
+ return SDValue();
+
+ SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
+ SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
+ SDValue W = DAG.getConstant(Width, dl, MVT::i64);
+ // Create the "width" part of the argument to insert_rp/insertp_rp.
+ SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
+ SDValue V = C0;
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ unsigned N = NElts-i-1;
+ SDValue OpN = Op.getOperand(N);
+
+ if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
+ }
+ SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
+ if (VT.getSizeInBits() == 32)
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
+ else
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Idx = Op.getOperand(1);
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, dl, MVT::i64);
+
+ // Constant element number.
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) {
+ uint64_t X = CI->getZExtValue();
+ SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32);
+ const SDValue Ops[] = {Vec, Width, Offset};
+
+ ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width);
+ assert(CW && "Non constant width in LowerEXTRACT_VECTOR");
+
+ SDValue N;
+ MVT SVT = VecVT.getSimpleVT();
+ uint64_t W = CW->getZExtValue();
+
+ if (W == 32) {
+ // Translate this node into EXTRACT_SUBREG.
+ unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
+
+ if (X == 0)
+ Subreg = Hexagon::subreg_loreg;
+ else if (SVT == MVT::v2i32 && X == 1)
+ Subreg = Hexagon::subreg_hireg;
+ else if (SVT == MVT::v4i16 && X == 2)
+ Subreg = Hexagon::subreg_hireg;
+ else if (SVT == MVT::v8i8 && X == 4)
+ Subreg = Hexagon::subreg_hireg;
+ else
+ llvm_unreachable("Bad offset");
+ N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
+
+ } else if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, dl, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ const SDValue Ops[] = {Vec, Combined};
+
+ SDValue N;
+ if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, dl, MVT::i64);
+
+ if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32);
+ const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, dl, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ if (VT.getSizeInBits() == 64 &&
+ Val.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, dl, MVT::i32);
+ Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
+ }
+
+ const SDValue Ops[] = {Vec, Val, Combined};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+bool
+HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+ // Assuming the caller does not have either a signext or zeroext modifier, and
+ // only one value is accepted, any reasonable truncation is allowed.
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ // FIXME: in principle up to 64-bit could be made safe, but it would be very
+ // fragile at the moment: any support for multiple value returns would be
+ // liable to disallow tail calls involving i64 -> iN truncation in many cases.
+ return Ty1->getPrimitiveSizeInBits() <= 32;
+}
+
+SDValue
+HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // Mark function as containing a call to EH_RETURN.
+ HexagonMachineFunctionInfo *FuncInfo =
+ DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->setHasEHReturn();
+
+ unsigned OffsetReg = Hexagon::R28;
+
+ SDValue StoreAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
+ DAG.getIntPtrConstant(4, dl));
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+
+ // Not needed we already use it as explict input to EH_RETURN.
+ // MF.getRegInfo().addLiveOut(OffsetReg);
+
+ return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default:
+#ifndef NDEBUG
+ Op.getNode()->dumpr(&DAG);
+ if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
+ errs() << "Check for a non-legal type in this operation\n";
+#endif
+ llvm_unreachable("Should not custom lower this!");
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ // Custom lower some vector loads.
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
+ case ISD::CTPOP: return LowerCTPOP(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+ }
+}
+
+/// Returns relocation base for the given PIC jumptable.
+SDValue
+HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ int Idx = cast<JumpTableSDNode>(Table)->getIndex();
+ EVT VT = Table.getValueType();
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
+}
+
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB)
+ const {
+ switch (MI->getOpcode()) {
+ case Hexagon::ALLOCA: {
+ MachineFunction *MF = BB->getParent();
+ auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->addAllocaAdjustInst(MI);
+ return BB;
+ }
+ default: llvm_unreachable("Unexpected instr type to insert");
+ } // switch
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+HexagonTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r': // R0-R31
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::i32:
+ case MVT::i16:
+ case MVT::i8:
+ case MVT::f32:
+ return std::make_pair(0U, &Hexagon::IntRegsRegClass);
+ case MVT::i64:
+ case MVT::f64:
+ return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
+ }
+ case 'q': // q0-q3
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v1024i1:
+ case MVT::v512i1:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+ }
+ case 'v': // V0-V31
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+ case MVT::v32i32:
+ case MVT::v64i16:
+ case MVT::v16i64:
+ case MVT::v128i8:
+ if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+ return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+ else
+ return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+ }
+
+ default:
+ llvm_unreachable("Unknown asm register class");
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return Subtarget.hasV5TOps();
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ // Allows a signed-extended 11-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ int Scale = AM.Scale;
+ if (Scale < 0) Scale = -Scale;
+ switch (Scale) {
+ case 0: // No scale reg, "r+i", "r", or just "i".
+ break;
+ default: // No scaled addressing mode.
+ return false;
+ }
+ return true;
+}
+
+/// Return true if folding a constant offset with the given GlobalAddress is
+/// legal. It is frequently not legal in PIC relocation models.
+bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
+ const {
+ return HTM.getRelocationModel() == Reloc::Static;
+}
+
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // ***************************************************************************
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes.
+ // ***************************************************************************
+
+ // If this is a tail call via a function pointer, then don't do it!
+ if (!(isa<GlobalAddressSDNode>(Callee)) &&
+ !(isa<ExternalSymbolSDNode>(Callee))) {
+ return false;
+ }
+
+ // Do not optimize if the calling conventions do not match.
+ if (!CCMatch)
+ return false;
+
+ // Do not tail call optimize vararg calls.
+ if (isVarArg)
+ return false;
+
+ // Also avoid tail call optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // In addition to the cases above, we also disable Tail Call Optimization if
+ // the calling convention code that at least one outgoing argument needs to
+ // go on the stack. We cannot check that here because at this point that
+ // information is not available.
+ return true;
+}
+
+// Return true when the given node fits in a positive half word.
+bool llvm::isPositiveHalfWord(SDNode *N) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue()))
+ return true;
+
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case ISD::SIGN_EXTEND_INREG:
+ return true;
+ }
+}
+
+std::pair<const TargetRegisterClass*, uint8_t>
+HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RRC = nullptr;
+
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(TRI, VT);
+ case MVT::v64i8:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ RRC = &Hexagon::VectorRegsRegClass;
+ break;
+ case MVT::v128i8:
+ case MVT::v64i16:
+ case MVT::v32i32:
+ case MVT::v16i64:
+ if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
+ Subtarget.useHVXDblOps())
+ RRC = &Hexagon::VectorRegs128BRegClass;
+ else
+ RRC = &Hexagon::VecDblRegsRegClass;
+ break;
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ RRC = &Hexagon::VecDblRegs128BRegClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Module *M = BB->getParent()->getParent();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+ assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
+ Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
+ : Intrinsic::hexagon_L4_loadd_locked;
+ Value *Fn = Intrinsic::getDeclaration(M, IntID);
+ return Builder.CreateCall(Fn, Addr, "larx");
+}
+
+/// Perform a store-conditional operation to Addr. Return the status of the
+/// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+ Value *Val, Value *Addr, AtomicOrdering Ord) const {
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Module *M = BB->getParent()->getParent();
+ Type *Ty = Val->getType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+ assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
+ Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
+ : Intrinsic::hexagon_S4_stored_locked;
+ Value *Fn = Intrinsic::getDeclaration(M, IntID);
+ Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
+ Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
+ Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
+ return Ext;
+}
+
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ // Do not expand loads and stores that don't exceed 64 bits.
+ return LI->getType()->getPrimitiveSizeInBits() > 64
+ ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ // Do not expand loads and stores that don't exceed 64 bits.
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 0000000..bf378b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,252 @@
+//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+// Return true when the given node fits in a positive half word.
+bool isPositiveHalfWord(SDNode *N);
+
+ namespace HexagonISD {
+ enum NodeType : unsigned {
+ OP_BEGIN = ISD::BUILTIN_OP_END,
+
+ CONST32 = OP_BEGIN,
+ CONST32_GP, // For marking data present in GP.
+ FCONST32,
+ ALLOCA,
+ ARGEXTEND,
+
+ AT_GOT, // Index in GOT.
+ AT_PCREL, // Offset relative to PC.
+
+ CALLv3, // A V3+ call instruction.
+ CALLv3nr, // A V3+ call instruction that doesn't return.
+ CALLR,
+
+ RET_FLAG, // Return with a flag operand.
+ BARRIER, // Memory barrier.
+ JT, // Jump table.
+ CP, // Constant pool.
+
+ POPCOUNT,
+ COMBINE,
+ PACKHL,
+ VSPLATB,
+ VSPLATH,
+ SHUFFEB,
+ SHUFFEH,
+ SHUFFOB,
+ SHUFFOH,
+ VSXTBH,
+ VSXTBW,
+ VSRAW,
+ VSRAH,
+ VSRLW,
+ VSRLH,
+ VSHLW,
+ VSHLH,
+ VCMPBEQ,
+ VCMPBGT,
+ VCMPBGTU,
+ VCMPHEQ,
+ VCMPHGT,
+ VCMPHGTU,
+ VCMPWEQ,
+ VCMPWGT,
+ VCMPWGTU,
+
+ INSERT,
+ INSERTRP,
+ EXTRACTU,
+ EXTRACTURP,
+ VCOMBINE,
+ TC_RETURN,
+ EH_RETURN,
+ DCFETCH,
+
+ OP_END
+ };
+ }
+
+ class HexagonSubtarget;
+
+ class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+
+ bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
+ const;
+ void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+ const HexagonTargetMachine &HTM;
+ const HexagonSubtarget &Subtarget;
+
+ public:
+ explicit HexagonTargetLowering(const TargetMachine &TM,
+ const HexagonSubtarget &ST);
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
+ bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
+
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+
+ // Should we expand the build vector with shuffles?
+ bool shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const;
+
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+ SelectionDAG &DAG) const override;
+
+ bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R1;
+ }
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
+ EVT VT) const override {
+ if (!VT.isVector())
+ return MVT::i1;
+ else
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+ }
+
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "v")
+ return InlineAsm::Constraint_v;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
+ // Intrinsics
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
+ /// Return true if folding a constant offset with the given GlobalAddress
+ /// is legal. It is frequently not legal in PIC relocation models.
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// Returns relocation base for the given PIC jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+ const override;
+
+ // Handling of atomic RMW instructions.
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+ AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+ return AtomicExpansionKind::LLSC;
+ }
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+ const override;
+ };
+} // end namespace llvm
+
+#endif // Hexagon_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
new file mode 100644
index 0000000..5a1a69b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -0,0 +1,462 @@
+//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Mappings
+//===----------------------------------------------------------------------===//
+
+
+def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
+ (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
+ (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
+ (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memb({GP}+#$addr) = $Nt",
+ (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt",
+ (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
+ (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt",
+ (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memd({GP}+#$addr) = $Nt",
+ (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
+
+def : InstAlias<"$Nt = memb({GP}+#$addr)",
+ (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memub({GP}+#$addr)",
+ (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memh({GP}+#$addr)",
+ (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memuh({GP}+#$addr)",
+ (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memw({GP}+#$addr)",
+ (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
+def : InstAlias<"$Nt = memd({GP}+#$addr)",
+ (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
+
+// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
+def : InstAlias<"memb($Rs) = $Rt",
+ (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt",
+ (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.h",
+ (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt",
+ (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = $Rt.new",
+ (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.new",
+ (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt.new",
+ (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = #$S8",
+ (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memh($Rs) = #$S8",
+ (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memw($Rs) = #$S8",
+ (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memd($Rs) = $Rtt",
+ (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"memb($Rs) = setbit(#$U5)",
+ (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = setbit(#$U5)",
+ (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = setbit(#$U5)",
+ (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memb($Rs) = clrbit(#$U5)",
+ (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = clrbit(#$U5)",
+ (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = clrbit(#$U5)",
+ (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
+def : InstAlias<"$Rd = memb($Rs)",
+ (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memub($Rs)",
+ (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memh($Rs)",
+ (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memuh($Rs)",
+ (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memw($Rs)",
+ (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memd($Rs)",
+ (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memubh($Rs)",
+ (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memubh($Rs)",
+ (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = membh($Rs)",
+ (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = membh($Rs)",
+ (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memb_fifo($Rs)",
+ (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memh_fifo($Rs)",
+ (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if ($Pt) $Rd = memXX($Rs)
+def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
+ (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
+ (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
+ (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
+ (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
+ (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if ($Pt) memXX($Rs) = $Rt
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
+ (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
+ (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
+ (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+
+// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if (!$Pt) $Rd = memXX($Rs)
+def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
+ (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
+ (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
+ (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
+ (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
+ (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if (!$Pt) memXX($Rs) = $Rt
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
+ (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
+ (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
+ (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
+ (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
+ (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
+ (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
+ (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
+ (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
+ (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
+ (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
+ (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
+// to: memXX($Rs) |= $Rt
+def : InstAlias<"memb($Rs) &= $Rt",
+ (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) |= $Rt",
+ (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += $Rt",
+ (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= $Rt",
+ (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += #$U5",
+ (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= #$U5",
+ (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) &= $Rt",
+ (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) |= $Rt",
+ (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += $Rt",
+ (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= $Rt",
+ (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += #$U5",
+ (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= #$U5",
+ (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) &= $Rt",
+ (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) |= $Rt",
+ (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += $Rt",
+ (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= $Rt",
+ (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += #$U5",
+ (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= #$U5",
+ (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+//
+// Alias of: if ($Pv.new) memX($Rs) = $Rt
+// to: if (p3.new) memX(r17 + #0) = $Rt
+def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
+ (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdtnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
+ (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdfnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+//
+// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
+// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
+def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
+ (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
+ (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"dcfetch($Rs)",
+ (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
+
+// Alias of some insn mappings, others must be handled by the parser
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
+// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
+def : InstAlias<"$Rd = neg($Rs)",
+ (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
+
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+def : InstAlias<"$Pd = $Ps",
+ (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
+
+def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
+ (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
+
+def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
+ (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
+ (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
+
+// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
new file mode 100644
index 0000000..280832f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
@@ -0,0 +1,1019 @@
+class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src2};
+ let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
+}
+
+class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
+class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
+class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
+class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
+class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
+class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
+class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
+class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
+class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
+class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
+class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
+class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
+class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
+class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
+class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
+class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
+class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
+class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
+class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
+class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
+class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
+class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
+class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
+class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
+class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
+class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
+class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
+class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
+class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
+class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
+class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
+class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
+class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
+class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
+class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
+class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
+class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
+class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
+class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
+class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
+class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
+class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
+class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
+class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
+class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
+class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
+class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
+class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
+class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
+class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
+class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
+class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
+class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
+class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
+class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
+class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
+class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
+class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
+class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
+class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
+class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
+class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
+class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
+class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
+class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
+class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
+class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
+class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
+class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
+class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
+class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
+class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
+class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
+class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
+class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
+class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
+class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
+class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
+class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
+class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
+class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
+class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
+class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
+class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
+class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
+class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
+class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
+class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
+class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
+class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
+class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
+class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
+class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
+class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
+class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
+class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
+class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
+class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
+class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
+class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
+class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
+class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
+class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
+class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
+class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
+class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
+class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
+class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
+class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
+class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
+class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
+class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
+class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
+class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
+class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
+class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
+class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
+class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
+class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
+class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
+class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
+class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
+class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
+class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
+class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
+class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
+class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
+class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
+class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
+class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
+class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
+class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
+class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
+class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
+class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
+class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
+class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
+class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
+class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
+class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
+class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
+class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
+class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
+class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
+class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
+class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
+class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
+class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
+class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
+class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
+class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
+class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
+class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
+class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
+class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
+class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
+class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
+class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
+class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
+class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
+class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
+class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
+class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
+class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
+class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
+class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
+class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
+class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
+class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
+class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
+class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
+class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
+class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
+class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
+class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
+class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
+class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
+class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
+class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
+class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
+class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
+class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
+class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
+class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
+class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
+
+class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
+ let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
+}
+
+class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
+class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
+class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
+class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
+class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
+class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
+class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
+class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
+class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
+class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
+class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
+class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
+class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
+class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
+class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
+class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
+class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
+class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
+class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
+class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
+class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
+class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
+class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
+class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
+class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
+class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
+class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
+class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
+class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
+class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
+class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
+class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
+class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
+class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
+class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
+class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
+class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
+class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
+
+class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
+ let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
+class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
+class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
+class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
+class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
+class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
+class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
+class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
+class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
+class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
+class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
+class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
+
+class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b00011110000000, opc{5-4} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
+class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
+class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
+class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
+class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
+class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
+class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
+class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
+class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
+class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
+class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
+class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
+class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
+class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
+class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
+class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
+class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
+class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
+class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
+class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
+class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
+class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
+class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
+class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
+class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
+
+class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
+class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
+class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
+class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
+class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
+class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
+class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
+class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
+class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
+class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
+class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
+class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
+class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
+
+class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
+class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
+class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
+
+class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
+class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
+class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
+class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
+class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
+class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
+class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
+class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
+class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
+class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
+class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
+class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
+class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
+class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
+
+class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
+class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
+class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
+class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
+class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
+class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
+class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
+class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
+class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
+class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
+class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
+class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
+class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
+class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
+class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
+class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
+
+// TODO: Change script to generate dst, src1, src2 instead of
+// dst, dst2, src1.
+class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
+class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
+class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
+class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
+class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
+class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
+class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
+class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
+class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
+class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
+class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
+class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
+class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
+
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
+class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
+class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
+
+class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
+class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
+class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
+class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
+class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
+class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
+class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
+class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
+class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
+class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
+class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
+class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
+class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
+class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
+class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
+class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
+class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
+class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
+class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
+class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
+class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
+class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
+class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
+class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
+class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
+class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
+class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
+class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
+class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
+
+class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<1> src2;
+
+ let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
+class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
+class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
+class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
+class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
+class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
+class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
+
+class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
+class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
+class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
+}
+
+class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
+class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<5> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
+class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
+class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
+class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
+class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
+class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
+class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
+class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
+class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
+class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<3> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
+class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
+class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
+class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
+
+
+class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<1> src3;
+
+ let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
+ let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
+}
+
+class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
+class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
+class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
+class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
+class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
+class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
+
+class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
+}
+
+class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
+class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
+
+class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b00011001111, src3{4-0} };
+ let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
+}
+
+class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
+class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
+
+
+class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
+ let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
+class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
+
+class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
+ let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
+}
+
+class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
+class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
+
+class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
+class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
+class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
+class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
+class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
+class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
+class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
+class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
+class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
+class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
+class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
+class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
+class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
+class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
+class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
+
+class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
+ let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
+}
+
+class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
+class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
+class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
+class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
+class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
+class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
+class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
+class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
+
+class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+ bits<2> src2;
+
+ let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
+}
+
+class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
+class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
+class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
+class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
+class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
+
+class V6_pred_not_enc : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b0001111000000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
+}
+
+class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
+ let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
+class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
+
+class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{15-5}, src1{4-0} };
+ let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
+}
+
+class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
+class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
+class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
+
+
+class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
+}
+
+class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
+class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
+
+class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+
+ let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
+class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
+class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
+class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
+class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
+class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
+
+class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src1{4-0} };
+ let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
+class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
+class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
+class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
+class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
+class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
+class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
+class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
+
+class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
+
+ let Inst{31-16} = { opc{24-10}, 0 };
+ let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
+}
+
+class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
+class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
+class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
+class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
+
+class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
+}
+
+class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
+class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
+
+class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
+ let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
+}
+
+class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
+class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
+
+class A5_ACS_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010101, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
+
+class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<2> src3;
+
+ let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
+}
+
+class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
+class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
+class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
+
+class V6_vhistq_enc : OpcodeHexagon {
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
+ let Inst{13-0} = { 0b10000010000000 };
+}
+
+// TODO: Change script to generate dst1 instead of dst.
+class A6_vminub_RdP_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010111, src2{4-0} };
+ let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 0000000..3c5ec17
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,448 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Flags +
+//
+// *** Must match HexagonBaseInfo.h ***
+//===----------------------------------------------------------------------===//
+
+class IType<bits<5> t> {
+ bits<5> Value = t;
+}
+def TypePSEUDO : IType<0>;
+def TypeALU32 : IType<1>;
+def TypeCR : IType<2>;
+def TypeJR : IType<3>;
+def TypeJ : IType<4>;
+def TypeLD : IType<5>;
+def TypeST : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE : IType<8>;
+def TypeENDLOOP: IType<31>;
+
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<6> value> {
+ bits<6> Value = value;
+}
+
+def HasAnySubT : SubTarget<0x3f>; // 111111
+def HasV5SubT : SubTarget<0x3e>; // 111110
+def HasV55SubT : SubTarget<0x3c>; // 111100
+def HasV60SubT : SubTarget<0x38>; // 111000
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoAddrMode : AddrModeType<0>; // No addressing mode
+def Absolute : AddrModeType<1>; // Absolute addressing mode
+def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode
+def BaseImmOffset : AddrModeType<3>; // Indirect with offset
+def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
+def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
+def PostInc : AddrModeType<6>; // Post increment addressing mode
+
+class MemAccessSize<bits<4> value> {
+ bits<4> Value = value;
+}
+
+def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess : MemAccessSize<3>;// Word access instruction (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv)
+def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv)
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Declaration +
+//===----------------------------------------------------------------------===//
+
+class OpcodeHexagon {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+
+ let Inst{31-28} = IClass;
+
+ bits<1> zero = 0;
+}
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr, InstrItinClass itin, IType type>
+ : Instruction {
+ let Namespace = "Hexagon";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Constraints = cstr;
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
+ IType Type = type;
+ let TSFlags{4-0} = Type.Value;
+
+ // Solo instructions, i.e., those that cannot be in a packet with others.
+ bits<1> isSolo = 0;
+ let TSFlags{5} = isSolo;
+ // Packed only with A or X-type instructions.
+ bits<1> isSoloAX = 0;
+ let TSFlags{6} = isSoloAX;
+ // Only A-type instruction in first slot or nothing.
+ bits<1> isSoloAin1 = 0;
+ let TSFlags{7} = isSoloAin1;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{8} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{9} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{10} = isPredicatedNew;
+ bits<1> isPredicateLate = 0;
+ let TSFlags{11} = isPredicateLate; // Late predicate producer insn.
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{12} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{13} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{16-14} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{17} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{18} = isNVStore; // New-value store insn.
+ bits<1> isCVLoadable = 0;
+ let TSFlags{19} = isCVLoadable; // Load that can become cur-value load.
+ bits<1> isCVLoad = 0;
+ let TSFlags{20} = isCVLoad; // Cur-value load insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{21} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{22} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{25-23} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{26} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
+
+ // If an instruction is valid on a subtarget, set the corresponding
+ // bit from validSubTargets.
+ // By default, instruction is valid on all subtargets.
+ SubTarget validSubTargets = HasAnySubT;
+ let TSFlags{39-34} = validSubTargets.Value;
+
+ // Addressing mode for load/store instructions.
+ AddrModeType addrMode = NoAddrMode;
+ let TSFlags{42-40} = addrMode.Value;
+
+ // Memory access size for mem access instructions (load/store)
+ MemAccessSize accessSize = NoMemAccess;
+ let TSFlags{46-43} = accessSize.Value;
+
+ bits<1> isTaken = 0;
+ let TSFlags {47} = isTaken; // Branch prediction.
+
+ bits<1> isFP = 0;
+ let TSFlags {48} = isFP; // Floating-point.
+
+ bits<1> hasNewValue2 = 0;
+ let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+ bits<3> opNewValue2 = 0;
+ let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+
+ bits<1> isAccumulator = 0;
+ let TSFlags{54} = isAccumulator;
+
+ // Fields used for relation models.
+ bit isNonTemporal = 0;
+ string isNT = ""; // set to "true" for non-temporal vector stores.
+ string BaseOpcode = "";
+ string CextOpcode = "";
+ string PredSense = "";
+ string PNewValue = "";
+ string NValueST = ""; // Set to "true" for new-value stores.
+ string InputType = ""; // Input is "imm" or "reg" type.
+ string isFloat = "false"; // Set to "true" for the floating-point load/store.
+ string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
+
+ let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+ "");
+ let PNewValue = !if(isPredicatedNew, "new", "");
+ let NValueST = !if(isNVStore, "true", "false");
+ let isNT = !if(isNonTemporal, "true", "false");
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes Definitions +
+//===----------------------------------------------------------------------===//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+let mayLoad = 1 in
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+
+let mayLoad = 1 in
+class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+
+// Same as ST0Inst but doesn't derive from OpcodeHexagon.
+let mayStore = 1 in
+class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : STInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+ OpcodeHexagon;
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
+
+// Same as above but doesn't derive from OpcodeHexagon
+class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = M_tc_2_SLOT23>
+ : MInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
+
+class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23>
+ : SInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+ OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+ OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr="">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+ OpcodeHexagon;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes Definitions -
+//===----------------------------------------------------------------------===//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 0000000..2d1dea5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,155 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Instruction Flags
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypeDUPLEX : IType<11>;
+def TypeCOMPOUND : IType<12>;
+def TypePREFIX : IType<30>;
+
+// Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<13> ISubHi = 0; // Low sub-insn
+ bits<13> ISubLo = 0; // High sub-insn
+
+ let Inst{31-29} = IClass{3-1};
+ let Inst{13} = IClass{0};
+ let Inst{15-14} = 0;
+ let Inst{28-16} = ISubHi;
+ let Inst{12-0} = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+ string cstr = "">
+ : Instruction, OpcodeDuplex {
+ let Namespace = "Hexagon";
+ IType Type = TypeDUPLEX; // uses slot 0,1
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let IClass = iClass;
+ let Constraints = cstr;
+ let Itinerary = DUPLEX;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ let TSFlags{4-0} = Type.Value;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{15} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{16} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{17} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
+
+//----------------------------------------------------------------------------//
+// Instruction Classes Definitions
+//----------------------------------------------------------------------------//
+
+//
+// NV type instructions.
+//
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Post increment ST Instruction.
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+ OpcodeHexagon;
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+ : MEMInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
+ TypePREFIX>, OpcodeHexagon;
+
+class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>,
+ OpcodeHexagon;
+
+class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ OpcodeHexagon;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
new file mode 100644
index 0000000..f3d43de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -0,0 +1,238 @@
+//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeCVI_VA : IType<13>;
+def TypeCVI_VA_DV : IType<14>;
+def TypeCVI_VX : IType<15>;
+def TypeCVI_VX_DV : IType<16>;
+def TypeCVI_VP : IType<17>;
+def TypeCVI_VP_VS : IType<18>;
+def TypeCVI_VS : IType<19>;
+def TypeCVI_VINLANESAT : IType<20>;
+def TypeCVI_VM_LD : IType<21>;
+def TypeCVI_VM_TMP_LD : IType<22>;
+def TypeCVI_VM_CUR_LD : IType<23>;
+def TypeCVI_VM_VP_LDU : IType<24>;
+def TypeCVI_VM_ST : IType<25>;
+def TypeCVI_VM_NEW_ST : IType<26>;
+def TypeCVI_VM_STU : IType<27>;
+def TypeCVI_HIST : IType<28>;
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_late<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LATE>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_SLOT2>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VINLANESAT>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_CUR_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+}
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ Requires<[HasV60T, UseHVX]>;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 0000000..eb3590c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,3828 @@
+//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-instrinfo"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRMAP_INFO
+#include "HexagonGenInstrInfo.inc"
+#include "HexagonGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
+ cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
+ "packetization boundary."));
+
+static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
+ cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
+
+static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable schedule adjustment for new value stores."));
+
+static cl::opt<bool> EnableTimingClassLatency(
+ "enable-timing-class-latency", cl::Hidden, cl::init(false),
+ cl::desc("Enable timing class latency"));
+
+static cl::opt<bool> EnableALUForwarding(
+ "enable-alu-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec alu forwarding"));
+
+static cl::opt<bool> EnableACCForwarding(
+ "enable-acc-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec acc forwarding"));
+
+static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7
+const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7
+const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6
+const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = -4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = -8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = -2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = -1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = -32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = -64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = -32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = -16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = -8;
+const int Hexagon_MEMV_AUTOINC_MAX = 192;
+const int Hexagon_MEMV_AUTOINC_MIN = -256;
+const int Hexagon_MEMV_AUTOINC_MAX_128B = 384;
+const int Hexagon_MEMV_AUTOINC_MIN_128B = -512;
+
+// Pin the vtable to this file.
+void HexagonInstrInfo::anchor() {}
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI() {}
+
+
+static bool isIntRegForSubInst(unsigned Reg) {
+ return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+ (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
+}
+
+
+static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
+ return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
+ isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
+}
+
+
+/// Calculate number of instructions excluding the debug instructions.
+static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
+ MachineBasicBlock::const_instr_iterator MIE) {
+ unsigned Count = 0;
+ for (; MIB != MIE; ++MIB) {
+ if (!MIB->isDebugValue())
+ ++Count;
+ }
+ return Count;
+}
+
+
+/// Find the hardware loop instruction used to set-up the specified loop.
+/// On Hexagon, we have two instructions used to set-up the hardware loop
+/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+/// to indicate the end of a loop.
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+ int LOOPi;
+ int LOOPr;
+ if (EndLoopOp == Hexagon::ENDLOOP0) {
+ LOOPi = Hexagon::J2_loop0i;
+ LOOPr = Hexagon::J2_loop0r;
+ } else { // EndLoopOp == Hexagon::EndLOOP1
+ LOOPi = Hexagon::J2_loop1i;
+ LOOPr = Hexagon::J2_loop1r;
+ }
+
+ // The loop set-up instruction will be in a predecessor block
+ for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
+ PE = BB->pred_end(); PB != PE; ++PB) {
+ // If this has been visited, already skip it.
+ if (!Visited.insert(*PB).second)
+ continue;
+ if (*PB == BB)
+ continue;
+ for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
+ E = (*PB)->instr_rend(); I != E; ++I) {
+ int Opc = I->getOpcode();
+ if (Opc == LOOPi || Opc == LOOPr)
+ return &*I;
+ // We've reached a different loop, which means the loop0 has been removed.
+ if (Opc == EndLoopOp)
+ return 0;
+ }
+ // Check the predecessors for the LOOP instruction.
+ MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
+ if (loop)
+ return loop;
+ }
+ return 0;
+}
+
+
+/// Gather register def/uses from MI.
+/// This treats possible (predicated) defs as actually happening ones
+/// (conservatively).
+static inline void parseOperands(const MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
+ Defs.clear();
+ Uses.clear();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (MO.isUse())
+ Uses.push_back(MO.getReg());
+
+ if (MO.isDef())
+ Defs.push_back(MO.getReg());
+ }
+}
+
+
+// Position dependent, so check twice for swap.
+static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ return false;
+ case HexagonII::HSIG_L1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_L2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_A:
+ return (Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_Compound:
+ return (Gb == HexagonII::HSIG_Compound);
+ }
+ return false;
+}
+
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::L2_loadri_io:
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// This function can analyze one/two way branching only and should (mostly) be
+/// called by target independent side.
+/// First entry is always the opcode of the branching instruction, except when
+/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
+/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
+/// e.g. Jump_c p will have
+/// Cond[0] = Jump_c
+/// Cond[1] = p
+/// HW-loop ENDLOOP:
+/// Cond[0] = ENDLOOP
+/// Cond[1] = MBB
+/// New value jump:
+/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
+/// Cond[1] = R
+/// Cond[2] = Imm
+///
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ TBB = nullptr;
+ FBB = nullptr;
+ Cond.clear();
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
+ return false;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ // Don't analyze EH branches.
+ return true;
+ } while (I != MBB.instr_begin());
+
+ I = MBB.instr_end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.instr_begin())
+ return false;
+ --I;
+ }
+
+ bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+ I->getOperand(0).isMBB();
+ // Delete the J2_jump if it's equivalent to a fall-through.
+ if (AllowModify && JumpToBlock &&
+ MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+ I->eraseFromParent();
+ I = MBB.instr_end();
+ if (I == MBB.instr_begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(&*I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ MachineInstr *SecondLastInst = nullptr;
+ // Find one more terminator if present.
+ for (;;) {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+ if (!SecondLastInst)
+ SecondLastInst = &*I;
+ else
+ // This is a third branch.
+ return true;
+ }
+ if (I == MBB.instr_begin())
+ break;
+ --I;
+ }
+
+ int LastOpcode = LastInst->getOpcode();
+ int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+ // If the branch target is not a basic block, it could be a tail call.
+ // (It is, if the target is a function.)
+ if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+ return true;
+ if (SecLastOpcode == Hexagon::J2_jump &&
+ !SecondLastInst->getOperand(0).isMBB())
+ return true;
+
+ bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
+ bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
+
+ // If there is only one terminator instruction, process it.
+ if (LastInst && !SecondLastInst) {
+ if (LastOpcode == Hexagon::J2_jump) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isEndLoopN(LastOpcode)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ if (LastOpcodeHasJMP_c) {
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Only supporting rr/ri versions of new-value jumps.
+ if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+ << " with one jump\n";);
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
+ bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
+ if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // Only supporting rr/ri versions of new-value jumps.
+ if (SecLastOpcodeHasNVJump &&
+ (SecondLastInst->getNumExplicitOperands() == 3) &&
+ (LastOpcode == Hexagon::J2_jump)) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Hexagon:JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst->getIterator();
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // If the block ends with an ENDLOOP, and J2_jump, handle it.
+ if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+ << " with two jumps";);
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ // Only removing branches from end of MBB.
+ if (!I->isBranch())
+ return Count;
+ if (Count && (I->getOpcode() == Hexagon::J2_jump))
+ llvm_unreachable("Malformed basic block: unconditional branch not last");
+ MBB.erase(&MBB.back());
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
+}
+
+
+unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+ unsigned BOpc = Hexagon::J2_jump;
+ unsigned BccOpc = Hexagon::J2_jumpt;
+ assert(validateBranchCond(Cond) && "Invalid branching condition");
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // J2_jumpf.
+ if (!Cond.empty() && Cond[0].isImm())
+ BccOpc = Cond[0].getImm();
+
+ if (!FBB) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && isPredicated(Term) &&
+ !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+ MachineBasicBlock *NextBB = &*++MBB.getIterator();
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, nullptr, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else if (isNewValueJump(Cond[0].getImm())) {
+ assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+ // New value jump
+ // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+ // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+ unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+ DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+ if (Cond[2].isReg()) {
+ unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+ } else if(Cond[2].isImm()) {
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addImm(Cond[2].getImm()).addMBB(TBB);
+ } else
+ llvm_unreachable("Invalid condition for branching");
+ } else {
+ assert((Cond.size() == 2) && "Malformed cond vector");
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ return 1;
+ }
+ assert((!Cond.empty()) &&
+ "Cond. cannot be empty when multiple branchings are required");
+ assert((!isNewValueJump(Cond[0].getImm())) &&
+ "NV-jump cannot be inserted with another branch");
+ // Special case for hardware loops. The condition is a basic block.
+ if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else {
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+ return 2;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const {
+ return nonDbgBBSize(&MBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
+ const {
+ return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs, BranchProbability Probability) const {
+ return NumInstrs <= 4;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ auto &HRI = getRegisterInfo();
+ if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+ // Map Pd = Ps to Pd = or(Ps, Ps).
+ BuildMI(MBB, I, DL, get(Hexagon::C2_or),
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
+ // We can have an overlap between single and double reg: r1:0 = r0.
+ if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+ // r1:0 = r0
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ } else {
+ // r1:0 = r1 or no overlap.
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg,
+ Hexagon::subreg_loreg))).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ }
+ return;
+ }
+ if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ Hexagon::PredRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg).
+ addReg(SrcReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg) &&
+ Hexagon::VectorRegsRegClass.contains(DestReg)) {
+ llvm_unreachable("Unimplemented pred to vec");
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(DestReg) &&
+ Hexagon::VectorRegsRegClass.contains(SrcReg)) {
+ llvm_unreachable("Unimplemented vec to pred");
+ return;
+ }
+ if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_hireg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_loreg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+
+#ifndef NDEBUG
+ // Show the invalid registers to ease debugging.
+ dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber()
+ << ": " << PrintReg(DestReg, &HRI)
+ << " = " << PrintReg(SrcReg, &HRI) << '\n';
+#endif
+ llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
+
+ if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Unimplemented");
+ }
+}
+
+
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned DestReg, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == &Hexagon::DoubleRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == &Hexagon::PredRegsRegClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else {
+ llvm_unreachable("Can't store this register to stack slot");
+ }
+}
+
+
+/// expandPostRAPseudo - This function is called for all pseudo instructions
+/// that remain after register allocation. Many pseudo instructions are
+/// created to help register allocation. This is the place to convert them
+/// into real instructions. The target can edit MI in place, or it can insert
+/// new instructions and erase MI. The function should return true if
+/// anything was changed.
+bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI)
+ const {
+ const HexagonRegisterInfo &HRI = getRegisterInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Opc = MI->getOpcode();
+ const unsigned VecOffset = 1;
+ bool Is128B = false;
+
+ switch (Opc) {
+ case Hexagon::ALIGNA:
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
+ .addReg(HRI.getFrameRegister())
+ .addImm(-MI->getOperand(1).getImm());
+ MBB.erase(MI);
+ return true;
+ case Hexagon::HEXAGON_V6_vassignp_128B:
+ case Hexagon::HEXAGON_V6_vassignp: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (SrcReg != DstReg)
+ copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_lo_128B:
+ case Hexagon::HEXAGON_V6_lo: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubLo);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_hi_128B:
+ case Hexagon::HEXAGON_V6_hi: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubHi);
+ return true;
+ }
+ case Hexagon::STrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::STrivv_indexed: {
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SrcSubLo)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MI1New->getOperand(0).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(1).getImm()+Offset)
+ .addReg(SrcSubHi)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed: {
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New =
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+ .addOperand(MI->getOperand(1))
+ .addImm(MI->getOperand(2).getImm());
+ MI1New->getOperand(1).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+ .addOperand(MI->getOperand(1))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(2).getImm() + Offset)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::LDriv_pseudo_V6: {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ int32_t Off = MI->getOperand(2).getImm();
+ int32_t Idx = Off;
+ BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+ .addOperand(MI->getOperand(1))
+ .addImm(Idx)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::STriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::STriv_pseudo_V6: {
+ unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ int32_t Off = MI->getOperand(1).getImm();
+ int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6);
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addOperand(MI->getOperand(0))
+ .addImm(Idx)
+ .addOperand(MI->getOperand(2))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TFR_PdTrue: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TFR_PdFalse: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::VMULW: {
+ // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ return true;
+ }
+ case Hexagon::VMULW_ACC: {
+ // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src3Reg = MI->getOperand(3).getReg();
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi).addReg(Src3SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo).addReg(Src3SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ MRI.clearKillFlags(Src3SubHi);
+ MRI.clearKillFlags(Src3SubLo);
+ return true;
+ }
+ case Hexagon::MUX64_rr: {
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ const MachineOperand &Op2 = MI->getOperand(2);
+ const MachineOperand &Op3 = MI->getOperand(3);
+ unsigned Rd = Op0.getReg();
+ unsigned Pu = Op1.getReg();
+ unsigned Rs = Op2.getReg();
+ unsigned Rt = Op3.getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned K1 = getKillRegState(Op1.isKill());
+ unsigned K2 = getKillRegState(Op2.isKill());
+ unsigned K3 = getKillRegState(Op3.isKill());
+ if (Rd != Rs)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
+ .addReg(Pu, (Rd == Rt) ? K1 : 0)
+ .addReg(Rs, K2);
+ if (Rd != Rt)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
+ .addReg(Pu, K1)
+ .addReg(Rt, K3);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TCRETURNi:
+ MI->setDesc(get(Hexagon::J2_jump));
+ return true;
+ case Hexagon::TCRETURNr:
+ MI->setDesc(get(Hexagon::J2_jumpr));
+ return true;
+ case Hexagon::TFRI_f:
+ case Hexagon::TFRI_cPt_f:
+ case Hexagon::TFRI_cNotPt_f: {
+ unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
+ APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF();
+ APInt IVal = FVal.bitcastToAPInt();
+ MI->RemoveOperand(Opx);
+ unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi :
+ (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
+ Hexagon::C2_cmoveif;
+ MI->setDesc(get(NewOpc));
+ MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+// We indicate that we want to reverse the branch by
+// inserting the reversed branching opcode.
+bool HexagonInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return true;
+ assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+ unsigned opcode = Cond[0].getImm();
+ //unsigned temp;
+ assert(get(opcode).isBranch() && "Should be a branching condition.");
+ if (isEndLoopN(opcode))
+ return true;
+ unsigned NewOpcode = getInvertedPredicatedOpcode(opcode);
+ Cond[0].setImm(NewOpcode);
+ return false;
+}
+
+
+void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
+}
+
+
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
+// Note: New-value stores are not included here as in the current
+// implementation, we don't need to check their predicate sense.
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
+ isEndLoopN(Cond[0].getImm())) {
+ DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+ return false;
+ }
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = predOpcodeHasNot(Cond);
+
+ // We have to predicate MI "in place", i.e. after this function returns,
+ // MI will need to be transformed into a predicated form. To avoid com-
+ // plicated manipulations with the operands (handling tied operands,
+ // etc.), build a new temporary instruction, then overwrite MI with it.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned PredOpc = getCondOpcode(Opc, invertJump);
+ MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+ unsigned NOp = 0, NumOps = MI->getNumOperands();
+ while (NOp < NumOps) {
+ MachineOperand &Op = MI->getOperand(NOp);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ break;
+ T.addOperand(Op);
+ NOp++;
+ }
+
+ unsigned PredReg, PredRegPos, PredRegFlags;
+ bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+ (void)GotPredReg;
+ assert(GotPredReg);
+ T.addReg(PredReg, PredRegFlags);
+ while (NOp < NumOps)
+ T.addOperand(MI->getOperand(NOp++));
+
+ MI->setDesc(get(PredOpc));
+ while (unsigned n = MI->getNumOperands())
+ MI->RemoveOperand(n-1);
+ for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+ MI->addOperand(T->getOperand(i));
+
+ MachineBasicBlock::instr_iterator TI = T->getIterator();
+ B.erase(TI);
+
+ MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+ MRI.clearKillFlags(PredReg);
+ return true;
+}
+
+
+bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const {
+ // TODO: Fix this
+ return false;
+}
+
+
+bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ auto &HRI = getRegisterInfo();
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == &Hexagon::PredRegsRegClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+ bool isPred = MI->getDesc().isPredicable();
+
+ if (!isPred)
+ return false;
+
+ const int Opc = MI->getOpcode();
+ int NumOperands = MI->getNumOperands();
+
+ // Keep a flag for upto 4 operands in the instructions, to indicate if
+ // that operand has been constant extended.
+ bool OpCExtended[4];
+ if (NumOperands > 4)
+ NumOperands = 4;
+
+ for (int i = 0; i < NumOperands; i++)
+ OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI));
+
+ switch(Opc) {
+ case Hexagon::A2_tfrsi:
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) ||
+ isInt<12>(MI->getOperand(1).getImm());
+
+ case Hexagon::S2_storerd_io:
+ return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerinew_io:
+ return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerhnew_io:
+ return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+ case Hexagon::S2_storerb_io:
+ case Hexagon::S2_storerbnew_io:
+ return isUInt<6>(MI->getOperand(1).getImm());
+
+ case Hexagon::L2_loadrd_io:
+ return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+ case Hexagon::L2_loadri_io:
+ return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
+ return isUInt<6>(MI->getOperand(2).getImm());
+
+ case Hexagon::L2_loadrd_pi:
+ return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+ case Hexagon::L2_loadri_pi:
+ return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+ case Hexagon::L2_loadrh_pi:
+ case Hexagon::L2_loadruh_pi:
+ return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+ case Hexagon::L2_loadrb_pi:
+ case Hexagon::L2_loadrub_pi:
+ return isInt<4>(MI->getOperand(3).getImm());
+
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
+ return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) &&
+ (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm()));
+
+ case Hexagon::A2_addi:
+ return isInt<8>(MI->getOperand(2).getImm());
+
+ case Hexagon::A2_aslh:
+ case Hexagon::A2_asrh:
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ return true;
+ }
+
+ return true;
+}
+
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB, const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Throwing call is a boundary.
+ if (MI->isCall()) {
+ // If any of the block's successors is a landing pad, this could be a
+ // throwing call.
+ for (auto I : MBB->successors())
+ if (I->isEHPad())
+ return true;
+ }
+
+ // Don't mess around with no return calls.
+ if (MI->getOpcode() == Hexagon::CALLv3nr)
+ return true;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isPosition())
+ return true;
+
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
+ return true;
+
+ return false;
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// Hexagon counts the number of ##'s and adjust for that many
+/// constant exenders.
+unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ StringRef AStr(Str);
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ // Add to size number of constant extenders seen * 4.
+ StringRef Occ("##");
+ Length += AStr.count(Occ)*4;
+ return Length;
+}
+
+
+ScheduleHazardRecognizer*
+HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost) const {
+ return getInstrTimingClassLatency(ItinData, MI);
+}
+
+
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+ const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
+}
+
+
+// Inspired by this pair:
+// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
+// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1]
+// Currently AA considers the addresses in these instructions to be aliasing.
+bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb, AliasAnalysis *AA) const {
+ int OffsetA = 0, OffsetB = 0;
+ unsigned SizeA = 0, SizeB = 0;
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+ MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef())
+ return false;
+
+ // Instructions that are pure loads, not loads and stores like memops are not
+ // dependent.
+ if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb))
+ return true;
+
+ // Get base, offset, and access size in MIa.
+ unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
+ if (!BaseRegA || !SizeA)
+ return false;
+
+ // Get base, offset, and access size in MIb.
+ unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
+ if (!BaseRegB || !SizeB)
+ return false;
+
+ if (BaseRegA != BaseRegB)
+ return false;
+
+ // This is a mem access with the same base register and known offsets from it.
+ // Reason about it.
+ if (OffsetA > OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB);
+ return (SizeB <= offDiff);
+ } else if (OffsetA < OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA);
+ return (SizeA <= offDiff);
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = &Hexagon::PredRegsRegClass;
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ TRC = &Hexagon::IntRegsRegClass;
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ TRC = &Hexagon::DoubleRegsRegClass;
+ } else {
+ llvm_unreachable("Cannot handle this register class");
+ }
+
+ unsigned NewReg = MRI.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+ return (getAddrMode(MI) == HexagonII::AbsoluteSet);
+}
+
+
+bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
+
+bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (!(isTC1(MI))
+ && !(QII->isTC2Early(MI))
+ && !(MI->getDesc().mayLoad())
+ && !(MI->getDesc().mayStore())
+ && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
+ && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+ && !(QII->isMemOp(MI))
+ && !(MI->isBranch())
+ && !(MI->isReturn())
+ && !MI->isCall())
+ return true;
+
+ return false;
+}
+
+
+// Return true if the instruction is a compund branch instruction.
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
+ return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
+}
+
+
+bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
+ return (MI->isBranch() && isPredicated(MI)) ||
+ isConditionalTransfer(MI) ||
+ isConditionalALU32(MI) ||
+ isConditionalLoad(MI) ||
+ // Predicated stores which don't have a .new on any operands.
+ (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+ !isPredicatedNew(MI));
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_paddf:
+ case Hexagon::A2_paddfnew:
+ case Hexagon::A2_paddif:
+ case Hexagon::A2_paddifnew:
+ case Hexagon::A2_paddit:
+ case Hexagon::A2_padditnew:
+ case Hexagon::A2_paddt:
+ case Hexagon::A2_paddtnew:
+ case Hexagon::A2_pandf:
+ case Hexagon::A2_pandfnew:
+ case Hexagon::A2_pandt:
+ case Hexagon::A2_pandtnew:
+ case Hexagon::A2_porf:
+ case Hexagon::A2_porfnew:
+ case Hexagon::A2_port:
+ case Hexagon::A2_portnew:
+ case Hexagon::A2_psubf:
+ case Hexagon::A2_psubfnew:
+ case Hexagon::A2_psubt:
+ case Hexagon::A2_psubtnew:
+ case Hexagon::A2_pxorf:
+ case Hexagon::A2_pxorfnew:
+ case Hexagon::A2_pxort:
+ case Hexagon::A2_pxortnew:
+ case Hexagon::A4_paslhf:
+ case Hexagon::A4_paslhfnew:
+ case Hexagon::A4_paslht:
+ case Hexagon::A4_paslhtnew:
+ case Hexagon::A4_pasrhf:
+ case Hexagon::A4_pasrhfnew:
+ case Hexagon::A4_pasrht:
+ case Hexagon::A4_pasrhtnew:
+ case Hexagon::A4_psxtbf:
+ case Hexagon::A4_psxtbfnew:
+ case Hexagon::A4_psxtbt:
+ case Hexagon::A4_psxtbtnew:
+ case Hexagon::A4_psxthf:
+ case Hexagon::A4_psxthfnew:
+ case Hexagon::A4_psxtht:
+ case Hexagon::A4_psxthtnew:
+ case Hexagon::A4_pzxtbf:
+ case Hexagon::A4_pzxtbfnew:
+ case Hexagon::A4_pzxtbt:
+ case Hexagon::A4_pzxtbtnew:
+ case Hexagon::A4_pzxthf:
+ case Hexagon::A4_pzxthfnew:
+ case Hexagon::A4_pzxtht:
+ case Hexagon::A4_pzxthtnew:
+ case Hexagon::C2_ccombinewf:
+ case Hexagon::C2_ccombinewt:
+ return true;
+ }
+ return false;
+}
+
+
+// FIXME - Function name and it's functionality don't match.
+// It should be renamed to hasPredNewOpcode()
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
+ if (!MI->getDesc().mayLoad() || !isPredicated(MI))
+ return false;
+
+ int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+ // Instruction with valid predicated-new opcode can be promoted to .new.
+ return PNewOpcode >= 0;
+}
+
+
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ case Hexagon::S4_pstorerbt_rr:
+ case Hexagon::S4_pstorerbf_rr:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S2_pstorerbt_pi:
+ case Hexagon::S2_pstorerbf_pi:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ case Hexagon::S4_pstorerdt_rr:
+ case Hexagon::S4_pstorerdf_rr:
+ case Hexagon::S2_pstorerdt_pi:
+ case Hexagon::S2_pstorerdf_pi:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ case Hexagon::S4_pstorerht_rr:
+ case Hexagon::S4_pstorerhf_rr:
+ case Hexagon::S2_pstorerht_pi:
+ case Hexagon::S2_pstorerhf_pi:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ case Hexagon::S4_pstorerit_rr:
+ case Hexagon::S4_pstorerif_rr:
+ case Hexagon::S2_pstorerit_pi:
+ case Hexagon::S2_pstorerif_pi:
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerif_abs:
+ return true;
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store.
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
+ }
+}
+
+
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::A2_tfrtnew:
+ case Hexagon::A2_tfrfnew:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmovenewif:
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
+// isFPImm and later getFPImm as well.
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable =
+ (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ if (MI->isCall())
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+ MO.isJTI() || MO.isCPI())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ const MCInstrDesc &ProdMCID = ProdMI->getDesc();
+ if (!ProdMCID.getNumDefs())
+ return false;
+
+ auto &HRI = getRegisterInfo();
+
+ SmallVector<unsigned, 4> DefsA;
+ SmallVector<unsigned, 4> DefsB;
+ SmallVector<unsigned, 8> UsesA;
+ SmallVector<unsigned, 8> UsesB;
+
+ parseOperands(ProdMI, DefsA, UsesA);
+ parseOperands(ConsMI, DefsB, UsesB);
+
+ for (auto &RegA : DefsA)
+ for (auto &RegB : UsesB) {
+ // True data dependency.
+ if (RegA == RegB)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegA))
+ for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegB == *SubRegs)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegB))
+ for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegA == *SubRegs)
+ return true;
+ }
+
+ return false;
+}
+
+
+// Returns true if the instruction is alread a .cur.
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::V6_vL32b_cur_pi:
+ case Hexagon::V6_vL32b_cur_ai:
+ case Hexagon::V6_vL32b_cur_pi_128B:
+ case Hexagon::V6_vL32b_cur_ai_128B:
+ return true;
+ }
+ return false;
+}
+
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
+ if (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)))
+ return true;
+
+ return false;
+}
+
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
+ const MachineInstr *MIb) const {
+ HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
+ HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
+ return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+ return true;
+
+ // Multiply
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
+ return true;
+ return false;
+}
+
+
+bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
+ return (Opcode == Hexagon::ENDLOOP0 ||
+ Opcode == Hexagon::ENDLOOP1);
+}
+
+
+bool HexagonInstrInfo::isExpr(unsigned OpType) const {
+ switch(OpType) {
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
+
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
+ case Hexagon::TFR_FI:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::FPPos) & HexagonII::FPMask;
+}
+
+
+// No V60 HVX VMEM with A_INDIRECT.
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const {
+ if (!isV60VectorInstruction(I))
+ return false;
+ if (!I->mayLoad() && !I->mayStore())
+ return false;
+ return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
+}
+
+
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_callr :
+ case Hexagon::J2_callrf :
+ case Hexagon::J2_callrt :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_fnew_pt :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpr :
+ case Hexagon::J2_jumprt :
+ case Hexagon::J2_jumprf :
+ case Hexagon::J2_jumprtnewpt :
+ case Hexagon::J2_jumprfnewpt :
+ case Hexagon::J2_jumprtnew :
+ case Hexagon::J2_jumprfnew :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true if a given MI can accomodate given offset.
+// Use abs estimate as oppose to the exact number.
+// TODO: This will need to be changed to use MC level
+// definition of instruction extendable field size.
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+ unsigned offset) const {
+ // This selection of jump instructions matches to that what
+ // AnalyzeBranch can parse, plus NVJ.
+ if (isNewValueJump(MI)) // r9:2
+ return isInt<11>(offset);
+
+ switch (MI->getOpcode()) {
+ // Still missing Jump to address condition on register value.
+ default:
+ return false;
+ case Hexagon::J2_jump: // bits<24> dst; // r22:2
+ case Hexagon::J2_call:
+ case Hexagon::CALLv3nr:
+ return isInt<24>(offset);
+ case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
+ case Hexagon::J2_jumpf:
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumpfnewpt:
+ case Hexagon::J2_callt:
+ case Hexagon::J2_callf:
+ return isInt<17>(offset);
+ case Hexagon::J2_loop0i:
+ case Hexagon::J2_loop0iext:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0rext:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop1iext:
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1rext:
+ return isInt<9>(offset);
+ // TODO: Add all the compound branches here. Can we do this in Relation model?
+ case Hexagon::J4_cmpeqi_tp0_jump_nt:
+ case Hexagon::J4_cmpeqi_tp1_jump_nt:
+ return isInt<11>(offset);
+ }
+}
+
+
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const {
+ if (!LRMI || !ESMI)
+ return false;
+
+ bool isLate = isLateResultInstr(LRMI);
+ bool isEarly = isEarlySourceInstr(ESMI);
+
+ DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
+ DEBUG(LRMI->dump());
+ DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
+ DEBUG(ESMI->dump());
+
+ if (isLate && isEarly) {
+ DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+ return true;
+ }
+
+ return false;
+}
+
+
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ switch (MI->getOpcode()) {
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::PHI:
+ return false;
+ default:
+ break;
+ }
+
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ case Hexagon::Sched::V2LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT01:
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
+ // resource, but all operands can be received late like an ALU instruction.
+ return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
+}
+
+
+bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+ return Opcode == Hexagon::J2_loop0i ||
+ Opcode == Hexagon::J2_loop0r ||
+ Opcode == Hexagon::J2_loop0iext ||
+ Opcode == Hexagon::J2_loop0rext ||
+ Opcode == Hexagon::J2_loop1i ||
+ Opcode == Hexagon::J2_loop1r ||
+ Opcode == Hexagon::J2_loop1iext ||
+ Opcode == Hexagon::J2_loop1rext;
+}
+
+
+bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
+ case Hexagon::L4_ior_memopb_io:
+ case Hexagon::L4_ior_memoph_io:
+ case Hexagon::L4_ior_memopw_io:
+ case Hexagon::L4_iand_memopb_io:
+ case Hexagon::L4_iand_memoph_io:
+ case Hexagon::L4_iand_memopw_io:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ return isNewValueJump(MI) || isNewValueStore(MI);
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ return isNewValue(MI) && MI->isBranch();
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
+ return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned OperandNum) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
+ return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ assert(isPredicated(MI));
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ assert(isPredicated(Opcode));
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ // Make sure that the instruction is predicated.
+ assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+}
+
+
+bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ assert(get(Opcode).isBranch() &&
+ (isPredicatedNew(Opcode) || isNewValue(Opcode)));
+ return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
+}
+
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+ MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+}
+
+
+bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
+}
+
+
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+// Returns true when SU has a timing class TC1.
+bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ //case Hexagon::Sched::M_tc_1_SLOT23:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2_SLOT23:
+ case Hexagon::Sched::CR_tc_2_SLOT3:
+ case Hexagon::Sched::M_tc_2_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2_SLOT23:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT3:
+ case Hexagon::Sched::J_tc_2early_SLOT0123:
+ case Hexagon::Sched::J_tc_2early_SLOT2:
+ case Hexagon::Sched::J_tc_2early_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2early_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2early_SLOT23:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
+}
+
+
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ const uint64_t V = getType(MI);
+ return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
+}
+
+
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
+ if (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX &&
+ (Offset & 0x3f) == 0);
+ }
+ // 128B
+ if (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX_128B &&
+ (Offset & 0x7f) == 0);
+ }
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
+ }
+ llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
+ bool Extend) const {
+ // This function is to check whether the "Offset" is in the correct range of
+ // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
+ // inserted to calculate the final address. Due to this reason, the function
+ // assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
+
+ switch (Opcode) {
+ case Hexagon::STriq_pred_V6:
+ case Hexagon::STriq_pred_vec_V6:
+ case Hexagon::STriv_pseudo_V6:
+ case Hexagon::STrivv_pseudo_V6:
+ case Hexagon::LDriq_pred_V6:
+ case Hexagon::LDriq_pred_vec_V6:
+ case Hexagon::LDriv_pseudo_V6:
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed:
+ case Hexagon::STrivv_indexed:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::V6_vS32Ub_ai:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX);
+
+ case Hexagon::STriq_pred_V6_128B:
+ case Hexagon::STriq_pred_vec_V6_128B:
+ case Hexagon::STriv_pseudo_V6_128B:
+ case Hexagon::STrivv_pseudo_V6_128B:
+ case Hexagon::LDriq_pred_V6_128B:
+ case Hexagon::LDriq_pred_vec_V6_128B:
+ case Hexagon::LDriv_pseudo_V6_128B:
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ case Hexagon::STrivv_indexed_128B:
+ case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32Ub_ai_128B:
+ case Hexagon::V6_vS32Ub_ai_128B:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+
+ case Hexagon::J2_loop0i:
+ case Hexagon::J2_loop1i:
+ return isUInt<10>(Offset);
+ }
+
+ if (Extend)
+ return true;
+
+ switch (Opcode) {
+ case Hexagon::L2_loadri_io:
+ case Hexagon::S2_storeri_io:
+ return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::S2_storerd_io:
+ return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ case Hexagon::S2_storerh_io:
+ return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
+ case Hexagon::S2_storerb_io:
+ return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+ case Hexagon::A2_addi:
+ return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+ (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
+ return (0 <= Offset && Offset <= 255);
+
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
+ return (0 <= Offset && Offset <= 127);
+
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
+ return (0 <= Offset && Offset <= 63);
+
+ // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+ // any size. Later pass knows how to handle it.
+ case Hexagon::STriw_pred:
+ case Hexagon::LDriw_pred:
+ return true;
+
+ case Hexagon::TFR_FI:
+ case Hexagon::TFR_FIA:
+ case Hexagon::INLINEASM:
+ return true;
+
+ case Hexagon::L2_ploadrbt_io:
+ case Hexagon::L2_ploadrbf_io:
+ case Hexagon::L2_ploadrubt_io:
+ case Hexagon::L2_ploadrubf_io:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ return isUInt<6>(Offset);
+
+ case Hexagon::L2_ploadrht_io:
+ case Hexagon::L2_ploadrhf_io:
+ case Hexagon::L2_ploadruht_io:
+ case Hexagon::L2_ploadruhf_io:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ return isShiftedUInt<6,1>(Offset);
+
+ case Hexagon::L2_ploadrit_io:
+ case Hexagon::L2_ploadrif_io:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ return isShiftedUInt<6,2>(Offset);
+
+ case Hexagon::L2_ploadrdt_io:
+ case Hexagon::L2_ploadrdf_io:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ return isShiftedUInt<6,3>(Offset);
+ } // switch
+
+ llvm_unreachable("No offset range is defined for this opcode. "
+ "Please define it in the above switch statement!");
+}
+
+
+bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
+ return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+}
+
+
+bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ const uint64_t F = get(MI->getOpcode()).TSFlags;
+ const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+ return
+ V == HexagonII::TypeCVI_VA ||
+ V == HexagonII::TypeCVI_VA_DV;
+}
+
+
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
+ return true;
+
+ if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI)))
+ return true;
+
+ if (mayBeNewStore(ConsMI))
+ return true;
+
+ return false;
+}
+
+
+/// \brief Can these instructions execute at the same time in a bundle.
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const {
+ if (DisableNVSchedule)
+ return false;
+ if (mayBeNewStore(Second)) {
+ // Make sure the definition of the first instruction is the value being
+ // stored.
+ const MachineOperand &Stored =
+ Second->getOperand(Second->getNumOperands() - 1);
+ if (!Stored.isReg())
+ return false;
+ for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
+ const MachineOperand &Op = First->getOperand(i);
+ if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
+ return true;
+ }
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
+}
+
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retrieve non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseLongOffset:
+ NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(),
+ Hexagon::InstrType_Pseudo) >= 0;
+}
+
+
+bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
+ return true;
+ ++I;
+ }
+ return false;
+}
+
+
+// Returns true, if a LD insn can be promoted to a cur load.
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
+ auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
+ HST.hasV60TOps();
+}
+
+
+// Returns true, if a ST insn can be promoted to a new-value store.
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ // There is no stall when ProdMI is not a V60 vector.
+ if (!isV60VectorInstruction(ProdMI))
+ return false;
+
+ // There is no stall when ProdMI and ConsMI are not dependent.
+ if (!isDependent(ProdMI, ConsMI))
+ return false;
+
+ // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI
+ // are scheduled in consecutive packets.
+ if (isVecUsableNextPacket(ProdMI, ConsMI))
+ return false;
+
+ return true;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator BII) const {
+ // There is no stall when I is not a V60 vector.
+ if (!isV60VectorInstruction(MI))
+ return false;
+
+ MachineBasicBlock::const_instr_iterator MII = BII;
+ MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
+
+ if (!(*MII).isBundle()) {
+ const MachineInstr *J = &*MII;
+ if (!isV60VectorInstruction(J))
+ return false;
+ else if (isVecUsableNextPacket(J, MI))
+ return false;
+ return true;
+ }
+
+ for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
+ const MachineInstr *J = &*MII;
+ if (producesStall(J, MI))
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+ unsigned PredReg) const {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
+ return false; // Predicate register must be explicitly defined.
+ }
+
+ // Hexagon Programmer's Reference says that decbin, memw_locked, and
+ // memd_locked cannot be used as .new as well,
+ // but we don't seem to have these instructions defined.
+ return MI->getOpcode() != Hexagon::A4_tlbmatch;
+}
+
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
+ return (Opcode == Hexagon::J2_jumpt) ||
+ (Opcode == Hexagon::J2_jumpf) ||
+ (Opcode == Hexagon::J2_jumptnew) ||
+ (Opcode == Hexagon::J2_jumpfnew) ||
+ (Opcode == Hexagon::J2_jumptnewpt) ||
+ (Opcode == Hexagon::J2_jumpfnewpt);
+}
+
+
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+ return false;
+ return !isPredicatedTrue(Cond[0].getImm());
+}
+
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
+}
+
+
+// Returns the base register in a memory access (load/store). The offset is
+// returned in Offset and the access size is returned in AccessSize.
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+ int &Offset, unsigned &AccessSize) const {
+ // Return if it is not a base+offset type instruction or a MemOp.
+ if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
+ getAddrMode(MI) != HexagonII::BaseLongOffset &&
+ !isMemOp(MI) && !isPostIncrement(MI))
+ return 0;
+
+ // Since it is a memory access instruction, getMemAccessSize() should never
+ // return 0.
+ assert (getMemAccessSize(MI) &&
+ "BaseImmOffset or BaseLongOffset or MemOp without accessSize");
+
+ // Return Values of getMemAccessSize() are
+ // 0 - Checked in the assert above.
+ // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these.
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+ AccessSize = (1U << (getMemAccessSize(MI) - 1));
+
+ unsigned basePos = 0, offsetPos = 0;
+ if (!getBaseAndOffsetPosition(MI, basePos, offsetPos))
+ return 0;
+
+ // Post increment updates its EA after the mem access,
+ // so we need to treat its offset as zero.
+ if (isPostIncrement(MI))
+ Offset = 0;
+ else {
+ Offset = MI->getOperand(offsetPos).getImm();
+ }
+
+ return MI->getOperand(basePos).getReg();
+}
+
+
+/// Return the position of the base and offset operands for this instruction.
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+ unsigned &BasePos, unsigned &OffsetPos) const {
+ // Deal with memops first.
+ if (isMemOp(MI)) {
+ assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Bad Memop.");
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayStore()) {
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayLoad()) {
+ BasePos = 1;
+ OffsetPos = 2;
+ } else
+ return false;
+
+ if (isPredicated(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+ if (isPostIncrement(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+
+ if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+ return false;
+
+ return true;
+}
+
+
+// Inserts branching instructions in reverse order of their occurence.
+// e.g. jump_t t1 (i1)
+// jump t2 (i2)
+// Jumpers = {i2, i1}
+SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
+ MachineBasicBlock& MBB) const {
+ SmallVector<MachineInstr*, 2> Jumpers;
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
+ return Jumpers;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return Jumpers;
+ } while (I != MBB.instr_begin());
+
+ I = MBB.instr_end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.instr_begin())
+ return Jumpers;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(&*I))
+ return Jumpers;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ Jumpers.push_back(LastInst);
+ MachineInstr *SecondLastInst = nullptr;
+ // Find one more terminator if present.
+ do {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+ if (!SecondLastInst) {
+ SecondLastInst = &*I;
+ Jumpers.push_back(SecondLastInst);
+ } else // This is a third branch.
+ return Jumpers;
+ }
+ if (I == MBB.instr_begin())
+ break;
+ --I;
+ } while (true);
+ return Jumpers;
+}
+
+
+// Returns Operand Index for the constant extended instruction.
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
+}
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
+ switch (MI->getOpcode()) {
+ default:
+ return HexagonII::HCG_None;
+ //
+ // Compound pairs.
+ // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+ // "Rd16=#U6 ; jump #r9:2"
+ // "Rd16=Rs16 ; jump #r9:2"
+ //
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((isUInt<5>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::S2_tstbit_i:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ MI->getOperand(2).isImm() &&
+ isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+ return HexagonII::HCG_A;
+ break;
+ // The fact that .new form is used pretty much guarantees
+ // that predicate register will match. Nevertheless,
+ // there could be some false positives without additional
+ // checking.
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnewpt:
+ Src1Reg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
+ (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
+ return HexagonII::HCG_B;
+ break;
+ // Transfer and jump:
+ // Rd=#U6 ; jump #r9:2
+ // Rd=Rs ; jump #r9:2
+ // Do not test for jump range here.
+ case Hexagon::J2_jump:
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ return HexagonII::HCG_C;
+ break;
+ }
+
+ return HexagonII::HCG_None;
+}
+
+
+// Returns -1 when there is no opcode found.
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const {
+ assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
+ assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
+ if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
+ (GB->getOpcode() != Hexagon::J2_jumptnew))
+ return -1;
+ unsigned DestReg = GA->getOperand(0).getReg();
+ if (!GB->readsRegister(DestReg))
+ return -1;
+ if (DestReg == Hexagon::P0)
+ return Hexagon::J4_cmpeqi_tp0_jump_nt;
+ if (DestReg == Hexagon::P1)
+ return Hexagon::J4_cmpeqi_tp1_jump_nt;
+ return -1;
+}
+
+
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
+ enum Hexagon::PredSense inPredSense;
+ inPredSense = invertPredicate ? Hexagon::PredSense_false :
+ Hexagon::PredSense_true;
+ int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+ if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+ return CondOpcode;
+
+ // This switch case will be removed once all the instructions have been
+ // modified to use relation maps.
+ switch(Opc) {
+ case Hexagon::TFRI_f:
+ return !invertPredicate ? Hexagon::TFRI_cPt_f :
+ Hexagon::TFRI_cNotPt_f;
+ }
+
+ llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+// Return the cur value instruction for a given store.
+int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown .cur type");
+ case Hexagon::V6_vL32b_pi:
+ return Hexagon::V6_vL32b_cur_pi;
+ case Hexagon::V6_vL32b_ai:
+ return Hexagon::V6_vL32b_cur_ai;
+ //128B
+ case Hexagon::V6_vL32b_pi_128B:
+ return Hexagon::V6_vL32b_cur_pi_128B;
+ case Hexagon::V6_vL32b_ai_128B:
+ return Hexagon::V6_vL32b_cur_ai_128B;
+ }
+ return 0;
+}
+
+
+
+// The diagram below shows the steps involved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+// ^ ^
+// / \ (not OK. it will cause new-value store to be
+// / X conditional on p0.new while R2 producer is
+// / \ on p0)
+// / \.
+// p.new store p.old NV store
+// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new]
+// ^ ^
+// \ /
+// \ /
+// \ /
+// p.old store
+// [if (p0)memw(R0+#0)=R2]
+//
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+// 2) p0 = cmp.eq(r3, #0) }
+//
+// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+// from the "Conditional Store" list. Because a predicated new value store
+// would NOT be promoted to a double dot new store. See diagram below:
+// This function returns yes for those stores that are predicated but not
+// yet promoted to predicate dot new instructions.
+//
+// +---------------------+
+// /-----| if (p0) memw(..)=r0 |---------\~
+// || +---------------------+ ||
+// promote || /\ /\ || promote
+// || /||\ /||\ ||
+// \||/ demote || \||/
+// \/ || || \/
+// +-------------------------+ || +-------------------------+
+// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+// +-------------------------+ || +-------------------------+
+// || || ||
+// || demote \||/
+// promote || \/ NOT possible
+// || || /\~
+// \||/ || /||\~
+// \/ || ||
+// +-----------------------------+
+// | if (p0.new) memw(..)=r0.new |
+// +-----------------------------+
+// Double Dot New Store
+//
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new --->
+// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1 --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+
+// Return the new value instruction for a given store.
+int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
+ int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
+ if (NVOpcode >= 0) // Valid new-value store instruction.
+ return NVOpcode;
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown .new type");
+ case Hexagon::S4_storerb_ur:
+ return Hexagon::S4_storerbnew_ur;
+
+ case Hexagon::S2_storerb_pci:
+ return Hexagon::S2_storerb_pci;
+
+ case Hexagon::S2_storeri_pci:
+ return Hexagon::S2_storeri_pci;
+
+ case Hexagon::S2_storerh_pci:
+ return Hexagon::S2_storerh_pci;
+
+ case Hexagon::S2_storerd_pci:
+ return Hexagon::S2_storerd_pci;
+
+ case Hexagon::S2_storerf_pci:
+ return Hexagon::S2_storerf_pci;
+
+ case Hexagon::V6_vS32b_ai:
+ return Hexagon::V6_vS32b_new_ai;
+
+ case Hexagon::V6_vS32b_pi:
+ return Hexagon::V6_vS32b_new_pi;
+
+ // 128B
+ case Hexagon::V6_vS32b_ai_128B:
+ return Hexagon::V6_vS32b_new_ai_128B;
+
+ case Hexagon::V6_vS32b_pi_128B:
+ return Hexagon::V6_vS32b_new_pi_128B;
+ }
+ return 0;
+}
+
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
+ // We assume that block can have at most two successors.
+ bool taken = false;
+ const MachineBasicBlock *Src = MI->getParent();
+ const MachineOperand *BrTarget = &MI->getOperand(1);
+ const MachineBasicBlock *Dst = BrTarget->getMBB();
+
+ const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpt:
+ return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ case Hexagon::J2_jumpf:
+ return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+
+ default:
+ llvm_unreachable("Unexpected jump instruction.");
+ }
+}
+
+
+// Return .new predicate version for an instruction.
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
+ int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+ if (NewOpcode >= 0) // Valid predicate new instruction
+ return NewOpcode;
+
+ switch (MI->getOpcode()) {
+ // Condtional Jumps
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
+ return getDotNewPredJumpOp(MI, MBPI);
+
+ default:
+ assert(0 && "Unknown .new type");
+ }
+ return 0;
+}
+
+
+int HexagonInstrInfo::getDotOldOp(const int opc) const {
+ int NewOp = opc;
+ if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
+ NewOp = Hexagon::getPredOldOpcode(NewOp);
+ assert(NewOp >= 0 &&
+ "Couldn't change predicate new instruction to its old form.");
+ }
+
+ if (isNewValueStore(NewOp)) { // Convert into non-new-value format
+ NewOp = Hexagon::getNonNVStore(NewOp);
+ assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
+ }
+ return NewOp;
+}
+
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+ auto &HRI = getRegisterInfo();
+
+ switch (MI->getOpcode()) {
+ default:
+ return HexagonII::HSIG_None;
+ //
+ // Group L1:
+ //
+ // Rd = memw(Rs+#u4:2)
+ // Rd = memub(Rs+#u4:0)
+ case Hexagon::L2_loadri_io:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ // Special case this one from Group L2.
+ // Rd = memw(r29+#u5:2)
+ if (isIntRegForSubInst(DstReg)) {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ // Rd = memw(Rs+#u4:2)
+ if (isIntRegForSubInst(SrcReg) &&
+ (MI->getOperand(2).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+ return HexagonII::HSIG_L1;
+ }
+ break;
+ case Hexagon::L2_loadrub_io:
+ // Rd = memub(Rs+#u4:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L1;
+ break;
+ //
+ // Group L2:
+ //
+ // Rd = memh/memuh(Rs+#u3:1)
+ // Rd = memb(Rs+#u3:0)
+ // Rd = memw(r29+#u5:2) - Handled above.
+ // Rdd = memd(r29+#u5:3)
+ // deallocframe
+ // [if ([!]p0[.new])] dealloc_return
+ // [if ([!]p0[.new])] jumpr r31
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ // Rd = memh/memuh(Rs+#u3:1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrb_io:
+ // Rd = memb(Rs+#u3:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isUInt<3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrd_io:
+ // Rdd = memd(r29+#u5:3)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ // dealloc_return is not documented in Hexagon Manual, but marked
+ // with A_SUBINSN attribute in iset_v4classic.py.
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ case Hexagon::L4_return:
+ case Hexagon::L2_deallocframe:
+ return HexagonII::HSIG_L2;
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::JMPret :
+ // jumpr r31
+ // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+ DstReg = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::JMPrett:
+ case Hexagon::JMPretf:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPretfnewpt :
+ case Hexagon::JMPrettnew :
+ case Hexagon::JMPretfnew :
+ DstReg = MI->getOperand(1).getReg();
+ SrcReg = MI->getOperand(0).getReg();
+ // [if ([!]p0[.new])] jumpr r31
+ if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ (Hexagon::P0 == SrcReg)) &&
+ (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ // [if ([!]p0[.new])] dealloc_return
+ SrcReg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
+ return HexagonII::HSIG_L2;
+ break;
+ //
+ // Group S1:
+ //
+ // memw(Rs+#u4:2) = Rt
+ // memb(Rs+#u4:0) = Rt
+ case Hexagon::S2_storeri_io:
+ // Special case this one from Group S2.
+ // memw(r29+#u5:2) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ isIntRegForSubInst(Src2Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ // memw(Rs+#u4:2) = Rt
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerb_io:
+ // memb(Rs+#u4:0) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group S2:
+ //
+ // memh(Rs+#u3:1) = Rt
+ // memw(r29+#u5:2) = Rt
+ // memd(r29+#s6:3) = Rtt
+ // memw(Rs+#u4:2) = #U1
+ // memb(Rs+#u4) = #U1
+ // allocframe(#u5:3)
+ case Hexagon::S2_storerh_io:
+ // memh(Rs+#u3:1) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerd_io:
+ // memd(r29+#s6:3) = Rtt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(Src2Reg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeiri_io:
+ // memw(Rs+#u4:2) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeirb_io:
+ // memb(Rs+#u4) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S2_allocframe:
+ if (MI->getOperand(0).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group A:
+ //
+ // Rx = add(Rx,#s7)
+ // Rd = Rs
+ // Rd = #u6
+ // Rd = #-1
+ // if ([!]P0[.new]) Rd = #0
+ // Rd = add(r29,#u6:2)
+ // Rx = add(Rx,Rs)
+ // P0 = cmp.eq(Rs,#u2)
+ // Rdd = combine(#0,Rs)
+ // Rdd = combine(Rs,#0)
+ // Rdd = combine(#u2,#U2)
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ // Rd = and(Rs,#1)
+ case Hexagon::A2_addi:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg)) {
+ // Rd = add(r29,#u6:2)
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
+ isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rx = add(Rx,#s7)
+ if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
+ isInt<7>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_add:
+ // Rx = add(Rx,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+ isIntRegForSubInst(Src2Reg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_andir:
+ // Same as zxtb.
+ // Rd16=and(Rs16,#255)
+ // Rd16=and(Rs16,#1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == 255)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ // Rd = #-1
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::C2_cmovenewif:
+ // if ([!]P0[.new]) Rd = #0
+ // Actual form:
+ // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) &&
+ Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
+ MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ // Rdd = combine(#u2,#U2)
+ DstReg = MI->getOperand(0).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
+ (MI->getOperand(1).isGlobal() &&
+ isUInt<2>(MI->getOperand(1).getOffset()))) &&
+ ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).isGlobal() &&
+ isUInt<2>(MI->getOperand(2).getOffset()))))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineri:
+ // Rdd = combine(Rs,#0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
+ (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineir:
+ // Rdd = combine(#0,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
+ (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ }
+
+ return HexagonII::HSIG_None;
+}
+
+
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+}
+
+
+// Return first non-debug instruction in the basic block.
+MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
+ const {
+ for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
+ MachineInstr *MI = &*MII;
+ if (MI->isDebugValue())
+ continue;
+ return MI;
+ }
+ return nullptr;
+}
+
+
+unsigned HexagonInstrInfo::getInstrTimingClassLatency(
+ const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return getInstrLatency(ItinData, MI);
+
+ // Get the latency embedded in the itinerary. If we're not using timing class
+ // latencies or if we using BSB scheduling, then restrict the maximum latency
+ // to 1 (that is, either 0 or 1).
+ if (MI->isTransient())
+ return 0;
+ unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+ if (!EnableTimingClassLatency ||
+ MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+ useBSBScheduling())
+ if (Latency > 1)
+ Latency = 1;
+ return Latency;
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::getInvertedPredSense(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return false;
+ unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm());
+ Cond[0].setImm(Opc);
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ int InvPredOpcode;
+ InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+ : Hexagon::getTruePredOpcode(Opc);
+ if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+ return InvPredOpcode;
+
+ llvm_unreachable("Unexpected predicated instruction");
+}
+
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1U << (bits - 1));
+ else
+ return ~(-1U << bits);
+}
+
+
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
+}
+
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1U << (bits - 1);
+ else
+ return 0;
+}
+
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+ if (NonExtOpcode >= 0)
+ return NonExtOpcode;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retrieve non-ext equivalent instruction.
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ return Hexagon::getBaseWithImmOffset(MI->getOpcode());
+ case HexagonII::BaseImmOffset :
+ return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ case HexagonII::BaseLongOffset:
+ return Hexagon::getRegShlForm(MI->getOpcode());
+
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
+
+
+bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
+ unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
+ if (Cond.empty())
+ return false;
+ assert(Cond.size() == 2);
+ if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
+ DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+ return false;
+ }
+ PredReg = Cond[1].getReg();
+ PredRegPos = 1;
+ // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
+ PredRegFlags = 0;
+ if (Cond[1].isImplicit())
+ PredRegFlags = RegState::Implicit;
+ if (Cond[1].isUndef())
+ PredRegFlags |= RegState::Undef;
+ return true;
+}
+
+
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+}
+
+
+short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
+ return Hexagon::getRegForm(MI->getOpcode());
+}
+
+
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+ if (MI->isDebugValue() || MI->isPosition())
+ return 0;
+
+ unsigned Size = MI->getDesc().getSize();
+ if (!Size)
+ // Assume the default insn size in case it cannot be determined
+ // for whatever reason.
+ Size = HEXAGON_INSTR_SIZE;
+
+ if (isConstExtended(MI) || isExtended(MI))
+ Size += HEXAGON_INSTR_SIZE;
+
+ // Try and compute number of instructions in asm.
+ if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+ // Disassemble the AsmStr and approximate number of instructions.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ Size = getInlineAsmLength(AsmStr, *MAI);
+ }
+
+ return Size;
+}
+
+
+uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
+}
+
+
+unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
+ const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+ const InstrItineraryData &II = *ST.getInstrItineraryData();
+ const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+
+ return IS.getUnits();
+}
+
+
+unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
+}
+
+
+// Calculate size of the basic block without debug instructions.
+unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
+ return nonDbgMICount(BB->instr_begin(), BB->instr_end());
+}
+
+
+unsigned HexagonInstrInfo::nonDbgBundleSize(
+ MachineBasicBlock::const_iterator BundleHead) const {
+ assert(BundleHead->isBundle() && "Not a bundle header");
+ auto MII = BundleHead.getInstrIterator();
+ // Skip the bundle header.
+ return nonDbgMICount(++MII, getBundleEnd(BundleHead));
+}
+
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+
+bool HexagonInstrInfo::invertAndChangeJumpTarget(
+ MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+ DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
+ << NewTarget->getNumber(); MI->dump(););
+ assert(MI->isBranch());
+ unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
+ int TargetPos = MI->getNumOperands() - 1;
+ // In general branch target is the last operand,
+ // but some implicit defs added at the end might change it.
+ while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+ --TargetPos;
+ assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
+ MI->getOperand(TargetPos).setMBB(NewTarget);
+ if (EnableBranchPrediction && isPredicatedNew(MI)) {
+ NewOpcode = reversePrediction(NewOpcode);
+ }
+ MI->setDesc(get(NewOpcode));
+ return true;
+}
+
+
+void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
+ /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
+ MachineFunction::iterator A = MF.begin();
+ MachineBasicBlock &B = *A;
+ MachineBasicBlock::iterator I = B.begin();
+ MachineInstr *MI = &*I;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
+ insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
+ NewMI = BuildMI(B, MI, DL, get(insn));
+ DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
+ " Class: " << NewMI->getDesc().getSchedClass());
+ NewMI->eraseFromParent();
+ }
+ /* --- The code above is used to generate complete set of Hexagon Insn --- */
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
+ DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
+ MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+ return true;
+}
+
+
+// Reverse the branch prediction.
+unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
+ int PredRevOpcode = -1;
+ if (isPredictedTaken(Opcode))
+ PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode);
+ else
+ PredRevOpcode = Hexagon::takenBranchPrediction(Opcode);
+ assert(PredRevOpcode > 0);
+ return PredRevOpcode;
+}
+
+
+// TODO: Add more rigorous validation.
+bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
+ const {
+ return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 0000000..9530d9f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,402 @@
+//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+
+#include "HexagonRegisterInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+struct EVT;
+class HexagonSubtarget;
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+ virtual void anchor();
+ const HexagonRegisterInfo RI;
+
+public:
+ explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+ /// TargetInstrInfo overrides.
+ ///
+
+ /// If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ /// If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ /// Analyze the branching code at the end of MBB, returning
+ /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+ /// implemented for a target). Upon success, this returns false and returns
+ /// with the following information in various cases:
+ ///
+ /// 1. If this block ends with no branches (it just falls through to its succ)
+ /// just return false, leaving TBB/FBB null.
+ /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+ /// the destination block.
+ /// 3. If this block ends with a conditional branch and it falls through to a
+ /// successor block, it sets TBB to be the branch destination block and a
+ /// list of operands that evaluate the condition. These operands can be
+ /// passed to other TargetInstrInfo methods to create new branches.
+ /// 4. If this block ends with a conditional branch followed by an
+ /// unconditional branch, it returns the 'true' destination in TBB, the
+ /// 'false' destination in FBB, and a list of operands that evaluate the
+ /// condition. These operands can be passed to other TargetInstrInfo
+ /// methods to create new branches.
+ ///
+ /// Note that RemoveBranch and InsertBranch must be implemented to support
+ /// cases where this method returns success.
+ ///
+ /// If AllowModify is true, then this routine is allowed to modify the basic
+ /// block (e.g. delete instructions after the unconditional branch).
+ ///
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+
+ /// Remove the branching code at the end of the specific MBB.
+ /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// returns the number of instructions that were removed.
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+ /// Insert branch code into the end of the specified MachineBasicBlock.
+ /// The operands to this method are the same as those
+ /// returned by AnalyzeBranch. This is only invoked in cases where
+ /// AnalyzeBranch returns success. It returns the number of instructions
+ /// inserted.
+ ///
+ /// It is also invoked by tail merging to add unconditional branches in
+ /// cases where AnalyzeBranch doesn't apply because there was no original
+ /// branch to analyze. At least this much must be implemented, else tail
+ /// merging needs to be disabled.
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const override;
+
+ /// Return true if it's profitable to predicate
+ /// instructions with accumulated instruction latency of "NumCycles"
+ /// of the specified basic block, where the probability of the instructions
+ /// being executed is given by Probability, and Confidence is a measure
+ /// of our confidence that it will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ BranchProbability Probability) const override;
+
+ /// Second variant of isProfitableToIfCvt. This one
+ /// checks for the case where two basic blocks from true and false path
+ /// of a if-then-else (diamond) are predicated on mutally exclusive
+ /// predicates, where the probability of the true path being taken is given
+ /// by Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ BranchProbability Probability) const override;
+
+ /// Return true if it's profitable for if-converter to duplicate instructions
+ /// of specified accumulated instruction latencies in the specified MBB to
+ /// enable if-conversion.
+ /// The probability of the instructions being executed is given by
+ /// Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override;
+
+ /// Emit instructions to copy a pair of physical registers.
+ ///
+ /// This function should support copies within any legal register class as
+ /// well as any cross-class copies created during instruction selection.
+ ///
+ /// The source and destination registers may overlap, which may require a
+ /// careful implementation when multiple copy instructions are required for
+ /// large registers. See for example the ARM target.
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ /// Store the specified register of the given register class to the specified
+ /// stack frame index. The store instruction is to be added to the given
+ /// machine basic block before the specified machine instruction. If isKill
+ /// is true, the register operand is the last use and must be marked kill.
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// Load the specified register of the given register class from the specified
+ /// stack frame index. The load instruction is to be added to the given
+ /// machine basic block before the specified machine instruction.
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// This function is called for all pseudo instructions
+ /// that remain after register allocation. Many pseudo instructions are
+ /// created to help register allocation. This is the place to convert them
+ /// into real instructions. The target can edit MI in place, or it can insert
+ /// new instructions and erase MI. The function should return true if
+ /// anything was changed.
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ /// Reverses the branch condition of the specified condition list,
+ /// returning false on success and true if it cannot be reversed.
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const override;
+
+ /// Insert a noop into the instruction stream at the specified point.
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
+ /// Returns true if the instruction is already predicated.
+ bool isPredicated(const MachineInstr *MI) const override;
+
+ /// Convert the instruction into a predicated instruction.
+ /// It returns true if the operation was successful.
+ bool PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Cond) const override;
+
+ /// Returns true if the first specified predicate
+ /// subsumes the second, e.g. GE subsumes GT.
+ bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const override;
+
+ /// If the specified instruction defines any predicate
+ /// or condition code register(s) used for predication, returns true as well
+ /// as the definition predicate(s) by reference.
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
+
+ /// Return true if the specified instruction can be predicated.
+ /// By default, this returns true for every instruction with a
+ /// PredicateOperand.
+ bool isPredicable(MachineInstr *MI) const override;
+
+ /// Test if the given instruction should be considered a scheduling boundary.
+ /// This primarily includes labels and terminators.
+ bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
+ /// Measure the specified inline asm to determine an approximation of its
+ /// length.
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const override;
+
+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions after register allocation.
+ ScheduleHazardRecognizer*
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const override;
+
+ /// For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ /// Compute the instruction latency of a given instruction.
+ /// If the instruction has higher cost when predicated, it's returned via
+ /// PredCost.
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const override;
+
+ /// Create machine specific model for scheduling.
+ DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
+
+ // Sometimes, it is possible for the target
+ // to tell, even without aliasing information, that two MIs access different
+ // memory addresses. This function returns true if two MIs access different
+ // memory addresses and false otherwise.
+ bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr)
+ const override;
+
+
+ /// HexagonInstrInfo specifics.
+ ///
+
+ const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ bool isAbsoluteSet(const MachineInstr* MI) const;
+ bool isAccumulator(const MachineInstr *MI) const;
+ bool isComplex(const MachineInstr *MI) const;
+ bool isCompoundBranchInstr(const MachineInstr *MI) const;
+ bool isCondInst(const MachineInstr *MI) const;
+ bool isConditionalALU32 (const MachineInstr* MI) const;
+ bool isConditionalLoad(const MachineInstr* MI) const;
+ bool isConditionalStore(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
+ bool isConstExtended(const MachineInstr *MI) const;
+ bool isDeallocRet(const MachineInstr *MI) const;
+ bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool isDotCurInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
+ bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
+ bool isEarlySourceInstr(const MachineInstr *MI) const;
+ bool isEndLoopN(unsigned Opcode) const;
+ bool isExpr(unsigned OpType) const;
+ bool isExtendable(const MachineInstr* MI) const;
+ bool isExtended(const MachineInstr* MI) const;
+ bool isFloat(const MachineInstr *MI) const;
+ bool isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const;
+ bool isIndirectCall(const MachineInstr *MI) const;
+ bool isIndirectL4Return(const MachineInstr *MI) const;
+ bool isJumpR(const MachineInstr *MI) const;
+ bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
+ bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const;
+ bool isLateResultInstr(const MachineInstr *MI) const;
+ bool isLateSourceInstr(const MachineInstr *MI) const;
+ bool isLoopN(const MachineInstr *MI) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isNewValue(const MachineInstr* MI) const;
+ bool isNewValue(unsigned Opcode) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJump(unsigned Opcode) const;
+ bool isNewValueStore(const MachineInstr* MI) const;
+ bool isNewValueStore(unsigned Opcode) const;
+ bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isPredicatedNew(const MachineInstr *MI) const;
+ bool isPredicatedNew(unsigned Opcode) const;
+ bool isPredicatedTrue(const MachineInstr *MI) const;
+ bool isPredicatedTrue(unsigned Opcode) const;
+ bool isPredicated(unsigned Opcode) const;
+ bool isPredicateLate(unsigned Opcode) const;
+ bool isPredictedTaken(unsigned Opcode) const;
+ bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+ bool isSolo(const MachineInstr* MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isTC1(const MachineInstr *MI) const;
+ bool isTC2(const MachineInstr *MI) const;
+ bool isTC2Early(const MachineInstr *MI) const;
+ bool isTC4x(const MachineInstr *MI) const;
+ bool isV60VectorInstruction(const MachineInstr *MI) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
+ bool isVecAcc(const MachineInstr *MI) const;
+ bool isVecALU(const MachineInstr *MI) const;
+ bool isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+
+
+ bool canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const;
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasNonExtEquivalent(const MachineInstr *MI) const;
+ bool hasPseudoInstrPair(const MachineInstr *MI) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool mayBeCurLoad(const MachineInstr* MI) const;
+ bool mayBeNewStore(const MachineInstr* MI) const;
+ bool producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator MII) const;
+ bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+ bool PredOpcodeHasJMP_c(unsigned Opcode) const;
+ bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
+
+
+ unsigned getAddrMode(const MachineInstr* MI) const;
+ unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+ unsigned &AccessSize) const;
+ bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
+ unsigned &OffsetPos) const;
+ SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
+ unsigned getCExtOpNum(const MachineInstr *MI) const;
+ HexagonII::CompoundGroup
+ getCompoundCandidateGroup(const MachineInstr *MI) const;
+ unsigned getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const;
+ int getCondOpcode(int Opc, bool sense) const;
+ int getDotCurOp(const MachineInstr* MI) const;
+ int getDotNewOp(const MachineInstr* MI) const;
+ int getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotOldOp(const int opc) const;
+ HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+ const;
+ short getEquivalentHWInstr(const MachineInstr *MI) const;
+ MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
+ unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+ bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
+ unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ int getMaxValue(const MachineInstr *MI) const;
+ unsigned getMemAccessSize(const MachineInstr* MI) const;
+ int getMinValue(const MachineInstr *MI) const;
+ short getNonExtOpcode(const MachineInstr *MI) const;
+ bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
+ unsigned &PredRegPos, unsigned &PredRegFlags) const;
+ short getPseudoInstrPair(const MachineInstr *MI) const;
+ short getRegForm(const MachineInstr *MI) const;
+ unsigned getSize(const MachineInstr *MI) const;
+ uint64_t getType(const MachineInstr* MI) const;
+ unsigned getUnits(const MachineInstr* MI) const;
+ unsigned getValidSubTargets(const unsigned Opcode) const;
+
+
+ /// getInstrTimingClassLatency - Compute the instruction latency of a given
+ /// instruction using Timing Class information, if available.
+ unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
+ unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
+
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool invertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) const;
+ void genAllInsnTimingClasses(MachineFunction &MF) const;
+ bool reversePredSense(MachineInstr* MI) const;
+ unsigned reversePrediction(unsigned Opcode) const;
+ bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 0000000..5cfeba7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,5809 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonOperands.td"
+include "HexagonInstrEnc.td"
+// Pattern fragment that combines the value type and the register class
+// into a single parameter.
+// The pat frags in the definitions below need to have a named register,
+// otherwise i32 will be assumed regardless of the register class. The
+// name of the register does not matter.
+def I1 : PatLeaf<(i1 PredRegs:$R)>;
+def I32 : PatLeaf<(i32 IntRegs:$R)>;
+def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
+def F32 : PatLeaf<(f32 IntRegs:$R)>;
+def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
+
+// Pattern fragments to extract the low and high subregisters from a
+// 64-bit value.
+def LoReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+def HiReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM1Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-2.
+def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-2 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM2Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-3.
+def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-3 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM3Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ uint32_t imm = N->getZExtValue();
+ return XformUToUM1Imm(imm, SDLoc(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Compare
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1,
+ opExtendable = 2 in
+class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
+ : ALU32Inst <(outs PredRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$src2),
+ "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)",
+ [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel {
+ bits<2> dst;
+ bits<5> src1;
+ bits<10> src2;
+ let CextOpcode = mnemonic;
+ let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10);
+ let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1);
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9});
+ let Inst{20-16} = src1;
+ let Inst{13-5} = src2{8-0};
+ let Inst{4} = isNot;
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = dst;
+ }
+
+def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10Ext>;
+def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10Ext>;
+def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>;
+
+class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
+ : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
+ (MI IntRegs:$src1, ImmPred:$src2)>;
+
+def : T_CMP_pat <C2_cmpeqi, seteq, s10ImmPred>;
+def : T_CMP_pat <C2_cmpgti, setgt, s10ImmPred>;
+def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
+
+let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
+class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
+ bit IsComm>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
+ let isCommutable = IsComm;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b0;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit PredNot, bit PredNew>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+ "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+ let isPredicated = 1;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b1;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{13} = PredNew;
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{7} = PredNot;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
+
+class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev>
+ : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> {
+ let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
+}
+
+def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
+def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
+def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
+def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
+
+class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
+ bits<3> MinOp, bit OpsRev, bit IsComm>
+ : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> {
+ let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
+}
+
+def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>;
+def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>;
+
+let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
+ def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>;
+ def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>;
+ def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
+ def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>;
+ def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>;
+ def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
+}
+
+let Itinerary = ALU32_3op_tc_2_SLOT0123 in
+def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>;
+
+def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>;
+def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>;
+
+multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev> {
+ def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
+ def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
+ def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
+ def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
+}
+
+multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit IsComm> {
+ let isPredicable = 1 in
+ def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
+ defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
+}
+
+defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
+defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
+defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
+defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
+defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
+
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+ : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or, A2_or, i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
+// A few special cases producing register pairs:
+let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
+ def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
+
+ let isPredicable = 1 in
+ def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>;
+
+ // Conditional combinew uses "newt/f" instead of "t/fnew".
+ def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
+ def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
+ def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
+ def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
+}
+
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
+
+let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
+class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
+ : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+ let CextOpcode = mnemonic;
+ let isCommutable = IsComm;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<2> Pd;
+
+ let IClass = 0b1111;
+ let Inst{27-24} = 0b0010;
+ let Inst{22-21} = MinOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4} = IsNeg;
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Pd;
+}
+
+let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
+ def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>;
+ def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>;
+ def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
+}
+
+// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
+// that reverse the order of the operands.
+class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
+
+// Pats for compares. They use PatFrags as operands, not SDNodes,
+// since seteq/setgt/etc. are defined as ParFrags.
+class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
+ : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
+def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
+
+def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
+
+let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
+def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let CextOpcode = "mux";
+ let InputType = "reg";
+ let hasSideEffects = 0;
+ let IClass = 0b1111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
+
+def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Combines the two immediates into a double register.
+// Increase complexity to make it greater than any complexity of a combine
+// that involves a register.
+
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
+ AddedComplexity = 75 in
+def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
+ "$Rdd = combine(#$s8, #$S8)",
+ [(set (i64 DoubleRegs:$Rdd),
+ (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
+ bits<5> Rdd;
+ bits<8> s8;
+ bits<8> S8;
+
+ let IClass = 0b0111;
+ let Inst{27-23} = 0b11000;
+ let Inst{22-16} = S8{7-1};
+ let Inst{13} = S8{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated ADD of a reg and an Immediate value.
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri_Pred <bit PredNot, bit PredNew>
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+ !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
+ ") $Rd = ")#"add($Rs, #$s8)"> {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let isPredicatedNew = PredNew;
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = PredNot;
+ let Inst{22-21} = Pu;
+ let Inst{20-16} = Rs;
+ let Inst{13} = PredNew;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+ }
+
+//===----------------------------------------------------------------------===//
+// A2_addi: Add a signed immediate to a register.
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri <Operand immOp>
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, immOp:$s16),
+ "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<16> s16;
+
+ let IClass = 0b1011;
+
+ let Inst{27-21} = s16{15-9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s16{8-0};
+ let Inst{4-0} = Rd;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for ADD of a register and an immediate value.
+//===----------------------------------------------------------------------===//
+multiclass Addri_Pred<string mnemonic, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ def NAME : T_Addri_Pred<PredNot, 0>;
+ // Predicate new
+ def NAME#new : T_Addri_Pred<PredNot, 1>;
+ }
+}
+
+let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
+multiclass Addri_base<string mnemonic, SDNode OpNode> {
+ let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
+ let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in
+ def A2_#NAME : T_Addri<s16Ext>;
+
+ let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
+ defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
+ defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
+ }
+ }
+}
+
+defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
+ (i32 (A2_addi I32:$Rs, imm:$s16))>;
+
+//===----------------------------------------------------------------------===//
+// Template class used for the following ALU32 instructions.
+// Rd=and(Rs,#s10)
+// Rd=or(Rs,#s10)
+//===----------------------------------------------------------------------===//
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", hasNewValue = 1 in
+class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : ALU32_ri <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s10Ext:$s10),
+ "$Rd = "#mnemonic#"($Rs, #$s10)" ,
+ [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<10> s10;
+ let CextOpcode = mnemonic;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0110;
+ let Inst{23-22} = MinOp;
+ let Inst{21} = s10{9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rd;
+ }
+
+def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
+def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
+
+// Subtract register from immediate
+// Rd32=sub(#s10,Rs32)
+let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
+ opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
+def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+ "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
+ bits<5> Rd;
+ bits<10> s10;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-22} = 0b011001;
+ let Inst{21} = s10{9};
+ let Inst{20-16} = Rs;
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rd;
+ }
+
+// Nop.
+let hasSideEffects = 0 in
+def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1111;
+}
+
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
+ (A2_subri imm:$s10, IntRegs:$Rs)>;
+
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def: Pat<(not (i32 IntRegs:$src1)),
+ (A2_subri -1, IntRegs:$src1)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_tfr16<bit isHi>
+ : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16),
+ "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
+ [], "$src1 = $Rx" > {
+ bits<5> Rx;
+ bits<16> u16;
+
+ let IClass = 0b0111;
+ let Inst{27-26} = 0b00;
+ let Inst{25-24} = !if(isHi, 0b10, 0b01);
+ let Inst{23-22} = u16{15-14};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rx;
+ let Inst{13-0} = u16{13-0};
+ }
+
+def A2_tfril: T_tfr16<0>;
+def A2_tfrih: T_tfr16<1>;
+
+// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
+let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
+class T_tfr_pred<bit isPredNot, bit isPredNew>
+ : ALU32Inst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ("#!if(isPredNot, "!", "")#
+ "$src1"#!if(isPredNew, ".new", "")#
+ ") $dst = $src2"> {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let isPredicatedFalse = isPredNot;
+ let isPredicatedNew = isPredNew;
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = isPredNot;
+ let Inst{13} = isPredNew;
+ let Inst{12-5} = 0;
+ let Inst{4-0} = dst;
+ let Inst{22-21} = src1;
+ let Inst{20-16} = src2;
+ }
+
+let isPredicable = 1 in
+class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ "$dst = $src"> {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b0111;
+
+ let Inst{27-21} = 0b0000011;
+ let Inst{20-16} = src;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in
+multiclass tfr_base<string CextOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ def NAME : T_tfr;
+
+ // Predicate
+ def t : T_tfr_pred<0, 0>;
+ def f : T_tfr_pred<1, 0>;
+ // Predicate new
+ def tnew : T_tfr_pred<0, 1>;
+ def fnew : T_tfr_pred<1, 1>;
+ }
+}
+
+// Assembler mapped to C2_ccombinew[t|f|newt|newf].
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+let isPredicated = 1 in
+class T_tfrp_pred<bit PredNot, bit PredNew>
+ : ALU32_rr <(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
+ "if ("#!if(PredNot, "!", "")#"$src1"
+ #!if(PredNew, ".new", "")#") $dst = $src2" > {
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+ }
+
+// Assembler mapped to A2_combinew.
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src),
+ "$dst = $src">;
+
+let hasSideEffects = 0 in
+multiclass TFR64_base<string BaseName> {
+ let BaseOpcode = BaseName in {
+ let isPredicable = 1 in
+ def NAME : T_tfrp;
+ // Predicate
+ def t : T_tfrp_pred <0, 0>;
+ def f : T_tfrp_pred <1, 0>;
+ // Predicate new
+ def tnew : T_tfrp_pred <0, 1>;
+ def fnew : T_tfrp_pred <1, 1>;
+ }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
+ isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
+ hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
+class T_TFRI_Pred<bit PredNot, bit PredNew>
+ : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12),
+ "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
+ [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<12> s12;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1110;
+ let Inst{23} = PredNot;
+ let Inst{22-21} = Pu;
+ let Inst{20} = 0b0;
+ let Inst{19-16,12-5} = s12;
+ let Inst{13} = PredNew;
+ let Inst{4-0} = Rd;
+}
+
+def C2_cmoveit : T_TFRI_Pred<0, 0>;
+def C2_cmoveif : T_TFRI_Pred<1, 0>;
+def C2_cmovenewit : T_TFRI_Pred<0, 1>;
+def C2_cmovenewif : T_TFRI_Pred<1, 1>;
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
+ CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
+ isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
+ isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
+def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
+ [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ ImmRegRel, PredRel {
+ bits<5> Rd;
+ bits<16> s16;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b1000;
+ let Inst{23-22,20-16,13-5} = s16;
+ let Inst{4-0} = Rd;
+}
+
+defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel;
+let isAsmParserOnly = 1 in
+defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
+
+// Assembler mapped
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isAsmParserOnly = 1 in
+def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
+
+// TODO: see if this instruction can be deleted..
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
+ "$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, s8Imm:$src2),
+ "$dst = combine(##$src1, #$src2)">;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+// Scalar mux register immediate.
+let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX",
+ InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in
+class T_MUX1 <bit MajOp, dag ins, string AsmStr>
+ : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<8> s8;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{23} = MajOp;
+ let Inst{22-21} = Pu;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+}
+
+let opExtendable = 2 in
+def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
+ "$Rd = mux($Pu, #$s8, $Rs)">;
+
+let opExtendable = 3 in
+def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = mux($Pu, $Rs, #$s8)">;
+
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
+
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
+
+// C2_muxii: Scalar mux immediates.
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 8, opExtendable = 2 in
+def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
+ (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
+ "$Rd = mux($Pu, #$s8, #$S8)" ,
+ [(set (i32 IntRegs:$Rd),
+ (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<8> s8;
+ bits<8> S8;
+
+ let IClass = 0b0111;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-23} = Pu;
+ let Inst{22-16} = S8{7-1};
+ let Inst{13} = S8{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+ }
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+
+//===----------------------------------------------------------------------===//
+// template class for non-predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxth
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op <string mnemonic, bits<3> minOp> :
+ ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = "#mnemonic#"($Rs)", [] > {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23-21} = minOp;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+}
+
+//===----------------------------------------------------------------------===//
+// template class for predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxtb, zxth
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
+ bit isPredNew > :
+ ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
+ !if(isPredNot, "if (!$Pu", "if ($Pu")
+ #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+
+ let IClass = 0b0111;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23-21} = minOp;
+ let Inst{13} = 0b1;
+ let Inst{11} = isPredNot;
+ let Inst{10} = isPredNew;
+ let Inst{4-0} = Rd;
+ let Inst{9-8} = Pu;
+ let Inst{20-16} = Rs;
+}
+
+multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>;
+
+ // Predicate new
+ let isPredicatedNew = 1 in
+ def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>;
+ }
+}
+
+multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, hasSideEffects = 0 in
+ def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
+
+ let isPredicated = 1, hasSideEffects = 0 in {
+ defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+ defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
+ }
+ }
+}
+
+defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
+defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
+defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
+defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
+defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
+
+// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
+// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
+// predicated forms while 'and' doesn't. Since integrated assembler can't
+// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
+// immediate operand is set to '255'.
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255)
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<10> s10 = 255;
+
+ let IClass = 0b0111;
+
+ let Inst{27-22} = 0b011000;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+}
+
+//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255)
+multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, hasSideEffects = 0 in
+ def A2_#NAME : T_ZXTB;
+
+ let isPredicated = 1, hasSideEffects = 0 in {
+ defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+ defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
+ }
+ }
+}
+
+defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
+
+def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
+def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// Template class for vector add and avg
+//===----------------------------------------------------------------------===//
+
+class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
+ bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
+ : ALU64_rr < (outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
+ #!if(isCrnd,":crnd","")
+ #!if(isSat, ":sat", ""),
+ [], "", ALU64_tc_2_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b0011;
+ let Inst{23-21} = majOp;
+ let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+ let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+ let Inst{7-5} = minOp;
+ let Inst{4-0} = Rdd;
+ }
+
+// ALU64 - Vector add
+// Rdd=vadd[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
+ def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>;
+ def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>;
+}
+
+// Rdd=vadd[u][bhw](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+ def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
+ def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>;
+ def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
+ def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>;
+}
+
+// ALU64 - Vector average
+// Rdd=vavg[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
+ def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>;
+ def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
+ def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>;
+ def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
+}
+
+// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
+def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
+def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>;
+def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>;
+def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
+
+def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>;
+def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>;
+def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
+
+// Rdd=vnavg[bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
+ def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
+}
+
+// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
+let Defs = [USR_OVF] in {
+ def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
+ def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
+ def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
+ def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+ def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
+ def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>;
+ def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+ def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
+ def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>;
+ def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
+ def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>;
+}
+
+// Rdd=vmax[u][bhw](Rss,Rtt)
+def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>;
+def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
+def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>;
+def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
+def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>;
+def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
+
+// Rdd=vmin[u][bhw](Rss,Rtt)
+def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>;
+def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
+def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>;
+def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
+def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>;
+def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_vcmp <string Str, bits<4> minOp>
+ : ALU64_rr <(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = "#Str#"($Rss, $Rtt)", [],
+ "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = minOp{3};
+ let Inst{7-5} = minOp{2-0};
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+ : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+ (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+// Vector compare bytes
+def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>;
+def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
+
+// Vector compare halfwords
+def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>;
+def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>;
+def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
+
+// Vector compare words
+def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>;
+def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>;
+def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
+
+def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
+def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+// No bits needed. If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+ "$Pd = cmp.geu($Rs, #$s8)">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+//===----------------------------------------------------------------------===//
+// Template Class
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
+ : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+ "$Rd = "#!if(isSub,"sub","add")#"($Rt."
+ #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs."
+ #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)"))
+ #!if(isSat,":sat","")
+ #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01010;
+ let Inst{22} = hasShift;
+ let Inst{21} = isSub;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rd;
+ let Inst{12-8} = Rt;
+ let Inst{20-16} = Rs;
+ }
+
+//Rd=sub(Rt.L,Rs.[LH])
+def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
+def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
+
+//Rd=add(Rt.L,Rs.[LH])
+def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
+def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
+
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
+ //Rd=sub(Rt.L,Rs.[LH]):sat
+ def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
+ def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
+
+ //Rd=add(Rt.L,Rs.[LH]):sat
+ def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>;
+ def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>;
+}
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
+def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
+def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
+def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
+def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
+def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
+def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
+
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
+ //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+ def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
+ def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
+ def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>;
+ def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>;
+
+ //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+ def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>;
+ def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>;
+ def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>;
+ def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
+}
+
+// Add halfword.
+def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
+ (A2_addh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
+ (A2_addh_l16_hl I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
+ (A2_addh_h16_ll I32:$src1, I32:$src2)>;
+
+// Subtract halfword.
+def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
+ (A2_subh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
+ (A2_subh_h16_ll I32:$src1, I32:$src2)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0000;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
+ : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+ "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+ #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01011;
+ let Inst{22-21} = !if(isMax, 0b10, 0b01);
+ let Inst{7} = isUnsigned;
+ let Inst{4-0} = Rd;
+ let Inst{12-8} = !if(isMax, Rs, Rt);
+ let Inst{20-16} = !if(isMax, Rt, Rs);
+ }
+
+def A2_min : T_XTYPE_MIN_MAX < 0, 0 >;
+def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
+def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
+def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
+
+// Here, depending on the operand being selected, we'll either generate a
+// min or max instruction.
+// Ex:
+// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
+// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
+// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
+// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
+
+multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
+ InstHexagon Inst, InstHexagon SwapInst> {
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src1), (VT RC:$src2)),
+ (Inst RC:$src1, RC:$src2)>;
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src2), (VT RC:$src1)),
+ (SwapInst RC:$src1, RC:$src2)>;
+}
+
+
+multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))), i16),
+ (Inst IntRegs:$src1, IntRegs:$src2)>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src2),
+ (i32 PositiveHalfWord:$src1))), i16),
+ (SwapInst IntRegs:$src1, IntRegs:$src2)>;
+}
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats<setge, A2_max, A2_min>;
+ defm: MinMax_pats<setgt, A2_max, A2_min>;
+ defm: MinMax_pats<setle, A2_min, A2_max>;
+ defm: MinMax_pats<setlt, A2_min, A2_max>;
+ defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setule, A2_minu, A2_maxu>;
+ defm: MinMax_pats<setult, A2_minu, A2_maxu>;
+}
+
+class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
+ : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = Pd;
+}
+
+def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>;
+def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
+def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
+
+class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
+ : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
+ (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
+
+def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
+def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
+def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
+def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
+def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+
+def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+ let hasSideEffects = 0;
+
+ bits<5> Rd;
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0001;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
+
+class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
+ bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
+ string Op2Pfx>
+ : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [],
+ "", ALU64_tc_1_SLOT23> {
+ let hasSideEffects = 0;
+ let isCommutable = IsComm;
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = RegType;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if (OpsRev,Rt,Rs);
+ let Inst{12-8} = !if (OpsRev,Rs,Rt);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
+
+class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
+ bit OpsRev, bit IsComm>
+ : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
+ IsComm, "">;
+
+def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
+def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
+
+def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
+
+class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
+ bit IsNeg>
+ : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
+ !if(IsNeg,"~","")>;
+
+def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
+def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
+def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
+
+def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+let hasSideEffects = 0 in
+class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
+ : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps),
+ "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+
+ let IClass = 0b0110;
+ let Inst{27-23} = 0b10111;
+ let Inst{22-21} = OpBits;
+ let Inst{20} = 0b0;
+ let Inst{17-16} = Ps;
+ let Inst{13} = 0b0;
+ let Inst{1-0} = Pd;
+}
+
+def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
+def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
+def C2_not : T_LOGICAL_1OP<"not", 0b10>;
+
+def: Pat<(i1 (not (i1 PredRegs:$Ps))),
+ (C2_not PredRegs:$Ps)>;
+
+let hasSideEffects = 0 in
+class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
+ : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
+ "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)",
+ [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+ bits<2> Pt;
+
+ let IClass = 0b0110;
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = OpBits;
+ let Inst{20} = 0b0;
+ let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some
+ let Inst{13} = 0b0; // instructions.
+ let Inst{9-8} = !if(Rev,Ps,Pt);
+ let Inst{1-0} = Pd;
+}
+
+def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
+def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>;
+def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
+def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
+def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
+
+def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
+ "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Ps;
+ bits<2> Pt;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1001;
+ let Inst{22-21} = 0b00;
+ let Inst{17-16} = Ps;
+ let Inst{9-8} = Pt;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
+ "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Pt;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0110;
+ let Inst{9-8} = Pt;
+ let Inst{4-0} = Rd;
+}
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
+
+class CondStr<string CReg, bit True, bit New> {
+ string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
+}
+class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+ string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", ""));
+}
+
+let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
+ isPredicable = 1,
+ isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+ opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
+class T_JMP<string ExtStr>
+ : JInst<(outs), (ins brtarget:$dst),
+ "jump " # ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23> {
+ bits<24> dst;
+ let IClass = 0b0101;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-16} = dst{23-15};
+ let Inst{13-1} = dst{14-2};
+}
+
+let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1,
+ isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+ opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in
+class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
+ : JInst<(outs), (ins PredRegs:$src, brtarget:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jump", isPredNew, isTak>.S # " " #
+ ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23>, ImmRegRel {
+ let isTaken = isTak;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<17> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{21} = PredNot;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+ let Inst{23-22} = dst{16-15};
+ let Inst{20-16} = dst{14-10};
+ let Inst{13} = dst{9};
+ let Inst{7-1} = dst{8-2};
+ }
+
+multiclass JMP_Pred<bit PredNot, string ExtStr> {
+ def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
+ // Predicate new
+ def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
+ def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
+}
+
+multiclass JMP_base<string BaseOp, string ExtStr> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMP<ExtStr>;
+ defm t : JMP_Pred<0, ExtStr>;
+ defm f : JMP_Pred<1, ExtStr>;
+ }
+}
+
+// Jumps to address stored in a register, JUMPR_MISC
+// if ([[!]P[.new]]) jumpr[:t/nt] Rs
+let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
+ isPredicable = 1, hasSideEffects = 0, InputType = "reg" in
+class T_JMPr
+ : JRInst<(outs), (ins IntRegs:$dst),
+ "jumpr $dst", [], "", J_tc_2early_SLOT2> {
+ bits<5> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = dst;
+}
+
+let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
+ hasSideEffects = 0, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
+ : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [],
+ "", J_tc_2early_SLOT2> {
+
+ let isTaken = isTak;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<5> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-22} = 0b001101;
+ let Inst{21} = PredNot;
+ let Inst{20-16} = dst;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+}
+
+multiclass JMPR_Pred<bit PredNot> {
+ def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
+ // Predicate new
+ def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
+ def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
+}
+
+multiclass JMPR_base<string BaseOp> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMPr;
+ defm t : JMPR_Pred<0>;
+ defm f : JMPR_Pred<1>;
+ }
+}
+
+let isCall = 1, hasSideEffects = 1 in
+class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
+ dag InputDag = (ins IntRegs:$Rs)>
+ : JRInst<(outs), InputDag,
+ !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
+ "if ($Pu) callr $Rs"),
+ "callr $Rs"),
+ [], "", J_tc_2early_SLOT2> {
+ bits<5> Rs;
+ bits<2> Pu;
+ let isPredicated = isPred;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b000;
+ let Inst{24-23} = !if (isPred, 0b10, 0b01);
+ let Inst{22} = 0;
+ let Inst{21} = isPredNot;
+ let Inst{9-8} = !if (isPred, Pu, 0b00);
+ let Inst{20-16} = Rs;
+
+ }
+
+let Defs = VolatileV3.Regs in {
+ def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
+ def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
+}
+
+let isTerminator = 1, hasSideEffects = 0 in {
+ defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
+
+ // Deal with explicit assembly
+ // - never extened a jump #, always extend a jump ##
+ let isAsmParserOnly = 1 in {
+ defm J2_jump_ext : JMP_base<"JMP", "##">;
+ defm J2_jump_noext : JMP_base<"JMP", "#">;
+ }
+
+ defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
+
+ let isReturn = 1, isCodeGenOnly = 1 in
+ defm JMPret : JMPR_base<"JMPret">, PredNewRel;
+}
+
+def: Pat<(br bb:$dst),
+ (J2_jump brtarget:$dst)>;
+def: Pat<(retflag),
+ (JMPret (i32 R31))>;
+def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
+ isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+def: Pat<(eh_return),
+ (EH_RETURN_JMPR (i32 R31))>;
+def: Pat<(brind (i32 IntRegs:$dst)),
+ (J2_jumpr IntRegs:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+
+// Load - Base with Immediate offset addressing mode
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
+class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
+ Operand ImmOp>
+ : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel {
+ bits<4> name;
+ bits<5> dst;
+ bits<5> src1;
+ bits<14> offset;
+ bits<11> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3},
+ !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2},
+ !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1},
+ /* s11_0Ext */ offset{10-0})));
+ let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+ !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+ !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+ /* s11_0Ext */ 11)));
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
+
+let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in
+class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp,
+ Operand ImmOp, bit isNot, bit isPredNew>
+ : LDInst<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "if ("#!if(isNot, "!$src1", "$src1")
+ #!if(isPredNew, ".new", "")
+ #") $dst = "#mnemonic#"($src2 + #$offset)",
+ [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> offset;
+ bits<6> offsetBits;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3},
+ !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2},
+ !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1},
+ /* u6_0Ext */ offset{5-0})));
+ let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+ !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+ !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+ /* u6_0Ext */ 6)));
+ let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1);
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isNot;
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{27} = 0b0;
+ let Inst{26} = isNot;
+ let Inst{25} = isPredNew;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b0;
+ let Inst{12-11} = src1;
+ let Inst{10-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<4>MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ let isPredicable = 1 in
+ def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>;
+
+ // Predicated
+ def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>;
+ def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>;
+
+ // Predicated new
+ def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>;
+ def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>;
+ }
+}
+
+let accessSize = ByteAccess in {
+ defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
+ defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
+ defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
+}
+
+let accessSize = WordAccess, opExtentAlign = 2 in
+defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
+
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
+defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>;
+ def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
+}
+
+let accessSize = WordAccess, opExtentAlign = 2 in {
+ def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
+ def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>;
+}
+
+let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
+ opExtendable = 3, isExtentSigned = 1 in
+class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
+ : LDInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "$dst = "#str#"($src2 + #$offset)", [],
+ "$src1 = $dst">, AddrModeRel {
+ bits<4> name;
+ bits<5> dst;
+ bits<5> src2;
+ bits<12> offset;
+ bits<11> offsetBits;
+
+ let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
+ /* s11_0Ext */ offset{10-0});
+ let IClass = 0b1001;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
+
+let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
+def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
+
+let accessSize = ByteAccess, opExtentBits = 11 in
+def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
+
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+ def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
+ def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
+ (VT (MI IntRegs:$Rs, imm:$Off))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
+}
+
+let AddedComplexity = 20 in {
+ defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
+ defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+ defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
+ defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
+ defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
+ (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Post increment load
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp >
+ : LDInstPI <(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1++#$offset)" ,
+ [],
+ "$src1 = $dst2" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<5> src1;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13-12} = 0b00;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isPredNot, bit isPredNew >
+ : LDInst <(outs RC:$dst, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
+ [] ,
+ "$src2 = $dst2" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12} = isPredNew;
+ let Inst{11} = isPredNot;
+ let Inst{10-9} = src1;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+
+multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp, bits<4> MajOp> {
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>;
+
+ // Predicated
+ def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>;
+ def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>;
+
+ // Predicated new
+ def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>;
+ def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>;
+ }
+}
+
+// post increment byte loads with immediate offset
+let accessSize = ByteAccess in {
+ defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>;
+ defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
+}
+
+// post increment halfword loads with immediate offset
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>;
+ defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
+}
+
+// post increment word loads with immediate offset
+let accessSize = WordAccess, opExtentAlign = 2 in
+defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
+
+// post increment doubleword loads with immediate offset
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
+defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
+
+// Rd=memb[u]h(Rx++#s4:1)
+// Rdd=memb[u]h(Rx++#s4:2)
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+ def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
+ def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
+}
+let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
+ def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+ def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment fifo loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
+ : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src2++#$offset)" ,
+ [], "$src2 = $dst2, $src1 = $dst" > ,
+ PredNewRel {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> offset;
+ bits<4> offsetBits;
+
+ let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0});
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13-12} = 0b00;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+// Ryy=memh_fifo(Rx++#s4:1)
+// Ryy=memb_fifo(Rx++#s4:0)
+let accessSize = ByteAccess in
+def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
+ MemAccessSize AccessSz>
+ : LDInstPI <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2),
+ "$dst = "#mnemonic#"($src1++$src2)" ,
+ [], "$src1 = $_dst_" > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<1> src2;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1 in {
+ def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>;
+ def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
+ def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>;
+ def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
+ def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>;
+
+ def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
+}
+
+def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
+
+// Load predicate.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins IntRegs:$addr, s11_2Ext:$off),
+ ".error \"should not emit\"", []>;
+
+let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
+ def L2_deallocframe : LDInst<(outs), (ins),
+ "deallocframe",
+ []> {
+ let IClass = 0b1001;
+
+ let Inst{27-16} = 0b000000011110;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = 0b11110;
+}
+
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
+ : LDInst <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+ "$Rz = $_dst_" > {
+ bits<5> dst;
+ bits<5> Rz;
+ bit Mu;
+
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b1;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let accessSize = ByteAccess in {
+ def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>;
+ def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess in {
+ def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>;
+ def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>;
+ def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>;
+ def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess in {
+ def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>;
+ let hasNewValue = 0 in {
+ def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
+ def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>;
+ }
+}
+
+let accessSize = DoubleWordAccess in
+def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>;
+
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
+ : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+ (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+ "$Rz = $_dst_, $dst = $_src_" > {
+ bits<5> dst;
+ bits<5> Rz;
+ bit Mu;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b1;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in
+class T_load_pci <string mnemonic, RegisterClass RC,
+ Operand ImmOp, bits<4> MajOp>
+ : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [],
+ "$Rz = $_dst_"> {
+ bits<5> dst;
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = offsetBits;
+ let Inst{4-0} = dst;
+ }
+
+// Byte variants of circ load
+let accessSize = ByteAccess in {
+ def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>;
+ def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
+}
+
+// Half word variants of circ load
+let accessSize = HalfWordAccess in {
+ def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>;
+ def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>;
+ def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
+ def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>;
+}
+
+// Word variants of circ load
+let accessSize = WordAccess in
+def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>;
+
+let accessSize = WordAccess, hasNewValue = 0 in {
+ def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+ def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess, hasNewValue = 0 in
+def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
+
+//===----------------------------------------------------------------------===//
+// Circular loads - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't
+// appear in the AsmString because it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pci_pseudo <string opc, RegisterClass RC>
+ : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4),
+ ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"",
+ [], "$src1 = $_dst_">;
+
+def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>;
+def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>;
+def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>;
+def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>;
+def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>;
+def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>;
+
+
+// TODO: memb_fifo and memh_fifo must take destination register as input.
+// One-off circ loads - not enough in common to break into a class.
+let accessSize = ByteAccess in
+def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
+
+// L[24]_load[wd]_locked: Load word/double with lock.
+let isSoloAX = 1 in
+class T_load_locked <string mnemonic, RegisterClass RC>
+ : LD0Inst <(outs RC:$dst),
+ (ins IntRegs:$src),
+ "$dst = "#mnemonic#"($src)"> {
+ bits<5> dst;
+ bits<5> src;
+ let IClass = 0b1001;
+ let Inst{27-21} = 0b0010000;
+ let Inst{20-16} = src;
+ let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
+ let Inst{5} = 0;
+ let Inst{4-0} = dst;
+}
+let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
+ def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
+let accessSize = DoubleWordAccess in
+ def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
+
+// S[24]_store[wd]_locked: Store word/double conditionally.
+let isSoloAX = 1, isPredicateLate = 1 in
+class T_store_locked <string mnemonic, RegisterClass RC>
+ : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
+ mnemonic#"($Rs, $Pd) = $Rt"> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1010;
+ let Inst{27-23} = 0b00001;
+ let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
+
+let accessSize = WordAccess in
+def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
+
+let accessSize = DoubleWordAccess in
+def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_load_pbr<string mnemonic, RegisterClass RC,
+ MemAccessSize addrSize, bits<4> majOp>
+ : LDInst
+ <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu),
+ "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" ,
+ [] , "$Rz = $_dst_" > {
+
+ let accessSize = addrSize;
+
+ bits<5> dst;
+ bits<5> Rz;
+ bits<1> Mu;
+
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b111;
+ let Inst{24-21} = majOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue =1, opNewValue = 0 in {
+ def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>;
+ def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
+ def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>;
+ def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
+ def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
+ def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
+ def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
+}
+
+def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
+def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>;
+def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
+
+def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
+def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
+ HalfWordAccess, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads - Pseudo
+//
+// Please note that 'src2' doesn't appear in the AsmString because
+// it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pbr_pseudo <string opc, RegisterClass RC>
+ : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ ".error \"$dst = "#opc#"($src1++$src3:brev)\"",
+ [], "$src1 = $_dst_">;
+
+def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>;
+def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>;
+def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>;
+def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>;
+def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>;
+def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
+ bit hasShift, bit isUnsigned>
+ : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")
+ #!if(isSat,":sat",""),
+ [], "", M_tc_3x_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isRnd;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
+def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
+def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
+def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>;
+def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
+def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
+def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
+def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
+
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
+def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
+def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
+def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>;
+def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
+def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
+def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
+def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
+def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
+def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
+def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
+def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>;
+def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
+def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
+def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
+def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+let Defs = [USR_OVF] in {
+ def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
+ def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
+ def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
+ def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>;
+ def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>;
+ def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>;
+ def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>;
+ def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>;
+
+ def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>;
+ def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>;
+ def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
+ bit hasShift, bit isUnsigned >
+ : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+ #"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+ [], "$dst2 = $Rx", M_tc_3x_SLOT23 > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+ let Inst{27-24} = 0b1110;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isNac;
+ let Inst{7} = isSat;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
+def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
+def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
+def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>;
+def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
+def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
+def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
+def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
+
+//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
+def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
+def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
+def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>;
+def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
+def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
+def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
+def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
+def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
+def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
+def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>;
+def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
+def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
+def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
+def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
+
+//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
+def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
+def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
+def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>;
+def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
+def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
+def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
+def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
+ : MInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+ #"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1",""),
+ [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0110;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isNac;
+ let Inst{7} = 0;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
+def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
+def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
+def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>;
+
+def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>;
+def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>;
+def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>;
+def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>;
+
+def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>;
+def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>;
+def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>;
+def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>;
+
+def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>;
+def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>;
+def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>;
+def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>;
+
+def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>;
+def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>;
+def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>;
+def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>;
+
+def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>;
+def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>;
+def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>;
+def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>;
+
+def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>;
+def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>;
+def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>;
+def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>;
+
+def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
+def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
+def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
+def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
+ bit isRnd, bit isSat >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")
+ #!if(isSat,":sat",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+let Defs = [USR_OVF] in {
+def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
+def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
+def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
+def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
+def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
+def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
+def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
+def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
+def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
+def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
+def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
+def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
+def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
+def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
+def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
+}
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
+ bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
+ string op2Suffix = "", bit isRaw = 0, bit isHi = 0 >
+ : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2),
+ "$dst = "#mnemonic
+ #"($src1, $src2"#op2Suffix#")"
+ #!if(MajOp{2}, ":<<1", "")
+ #!if(isRnd, ":rnd", "")
+ #!if(isSat, ":sat", "")
+ #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src2;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
+ : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
+
+class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0 >
+ : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0 >
+ : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0, string op2str = "" >
+ : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
+
+def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
+def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>;
+def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>;
+
+let CextOpcode = "mpyi", InputType = "reg" in
+def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
+
+def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>;
+def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
+
+def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
+
+def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
+def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
+
+def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
+def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
+
+def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
+def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
+def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
+def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
+
+// V4 Instructions
+def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
+def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
+def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
+def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
+
+def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
+def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
+
+def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
+ : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
+ "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" ,
+ pattern, "", M_tc_3x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<8> u8;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23} = isNeg;
+ let Inst{13} = 0b0;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rs;
+ let Inst{12-5} = u8;
+ }
+
+let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
+def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
+ [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
+
+def M2_mpysin : T_MType_mpy_ri <1, u8Imm,
+ [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
+ u8ImmPred:$u8)))]>;
+
+// Assember mapped to M2_mpyi
+let isAsmParserOnly = 1 in
+def M2_mpyui : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyui($src1, $src2)">;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+ CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
+ isAsmParserOnly = 1 in
+def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+ s32ImmPred:$src2))]>, ImmRegRel;
+
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
+ InputType = "imm" in
+class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp,
+ list<dag> pattern = []>
+ : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3),
+ "$dst "#mnemonic#"($src2, #$src3)",
+ pattern, "$src1 = $dst", M_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src2;
+ bits<8> src3;
+
+ let IClass = 0b1110;
+
+ let Inst{27-26} = 0b00;
+ let Inst{25-23} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b0;
+ let Inst{12-5} = src3;
+ let Inst{4-0} = dst;
+ }
+
+let InputType = "reg", hasNewValue = 1 in
+class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0,
+ bit isSat = 0, bit isShift = 0>
+ : MInst < (outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)")
+ #!if(isShift, ":<<1", "")
+ #!if(isSat, ":sat", ""),
+ pattern, "$src1 = $dst", M_tc_2_SLOT23 > {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if(isSwap, src3, src2);
+ let Inst{13} = 0b0;
+ let Inst{12-8} = !if(isSwap, src2, src3);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
+ def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
+ [(set (i32 IntRegs:$dst),
+ (add (mul IntRegs:$src2, u32ImmPred:$src3),
+ IntRegs:$src1))]>, ImmRegRel;
+
+ def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
+ [(set (i32 IntRegs:$dst),
+ (add (mul IntRegs:$src2, IntRegs:$src3),
+ IntRegs:$src1))]>, ImmRegRel;
+}
+
+let CextOpcode = "ADD_acc" in {
+ let isExtentSigned = 1 in
+ def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
+ [(set (i32 IntRegs:$dst),
+ (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
+ (i32 IntRegs:$src1)))]>, ImmRegRel;
+
+ def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
+ [(set (i32 IntRegs:$dst),
+ (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (i32 IntRegs:$src1)))]>, ImmRegRel;
+}
+
+let CextOpcode = "SUB_acc" in {
+ let isExtentSigned = 1 in
+ def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
+
+ def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel;
+}
+
+let Itinerary = M_tc_3x_SLOT23 in
+def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
+
+def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
+def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
+
+class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
+ PatLeaf ImmPred>
+ : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
+ (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
+
+class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
+ (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
+
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
+def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- XType Vector Instructions
+//===----------------------------------------------------------------------===//
+class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+ : MInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+ [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
+ : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
+ "$Rdd = "#opc#"($Rtt, $Rss)",
+ [], "",M_tc_2_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
+def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
+def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
+def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
+def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
+
+def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
+def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
+
+// Vector reduce halfwords:
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
+def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy with accumulation.
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+let Defs = [USR_OVF] in
+class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
+ bit hasShift, bit isRnd >
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd","")#":sat",
+ [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
+ bit hasShift, bit isRnd >
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd",""),
+ [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
+def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
+def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
+def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
+
+def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
+def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
+def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
+def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
+def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
+def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
+def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
+
+def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
+def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
+def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
+def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>;
+def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
+def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
+def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
+def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Multiply signed/unsigned halfwords with and without
+// saturation and rounding
+//===----------------------------------------------------------------------===//
+class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
+ : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+ #", $Rt."#!if(LHbits{0},"h)","l)")
+ #!if(hasShift,":<<1","")
+ #!if(isRnd,":rnd",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0100;
+ let Inst{23} = hasShift;
+ let Inst{22} = isUnsigned;
+ let Inst{21} = isRnd;
+ let Inst{6-5} = LHbits;
+ let Inst{4-0} = Rdd;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+}
+
+def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
+def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
+def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
+def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>;
+
+def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>;
+def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>;
+def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>;
+def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>;
+
+def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>;
+def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>;
+def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>;
+def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>;
+
+def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>;
+def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>;
+def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>;
+def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>;
+
+//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1]
+def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>;
+def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>;
+def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>;
+def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>;
+
+def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
+def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
+def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
+def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit hasShift, bit isConj>
+ : MInst <(outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+ [] > {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy with accumulation into 64-bit:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit hasShift, bit isConj>
+ : MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")")
+ #!if(hasShift,":<<1","")
+ #!if(isSat,":sat",""),
+
+ [] , "$dst2 = $Rxx" > {
+ bits<5> Rxx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b0111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rxx;
+ }
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs,Rt)
+def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
+
+// Complex multiply real or imaginary
+// Rxx=cmpy[ir](Rs,Rt)
+def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
+def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
+def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
+
+// Complex multiply
+// Rdd=cmpy(Rs,Rt)[:<<]:sat
+def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
+def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
+
+// Rdd=cmpy(Rs,Rt*)[:<<]:sat
+def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
+def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
+def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
+def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
+def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
+def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
+def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
+def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<]:sat
+//let Defs = [USR_OVF] in {
+ def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
+ def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
+//}
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
+def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
+def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
+
+def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (anyext (i32 IntRegs:$src2))))),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2))))),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (is_sext_i32:$src1),
+ (is_sext_i32:$src2))),
+ (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+// Store doubleword.
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isHalf >
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""),
+ [], "$src1 = $_dst_" >,
+ AddrModeRel {
+ bits<5> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b1010;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src2;
+ let Inst{7} = 0b0;
+ let Inst{6-3} = offsetBits;
+ let Inst{1} = 0b0;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
+ : STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""),
+ [], "$src2 = $_dst_" >,
+ AddrModeRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<7> offset;
+ bits<5> src3;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+ !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0})));
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = src3;
+ let Inst{7} = isPredNew;
+ let Inst{6-3} = offsetBits;
+ let Inst{2} = isPredNot;
+ let Inst{1-0} = src1;
+ }
+
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>;
+
+ // Predicated
+ def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>;
+ def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>;
+
+ // Predicated new
+ def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+ isHalf, 0, 1>;
+ def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+ isHalf, 1, 1>;
+ }
+}
+
+let accessSize = ByteAccess in
+defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
+
+let accessSize = HalfWordAccess in
+defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
+
+let accessSize = WordAccess in
+defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
+
+let accessSize = DoubleWordAccess in
+defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
+
+let accessSize = HalfWordAccess, isNVStorable = 0 in
+defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
+
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+ InstHexagon MI>
+ : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+ (MI I32:$src2, imm:$offset, Value:$src1)>;
+
+def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment stores with register offset.
+//===----------------------------------------------------------------------===//
+class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, RC:$src3),
+ mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""),
+ [], "$src1 = $_dst_" > {
+ bits<5> src1;
+ bits<1> src2;
+ bits<5> src3;
+ let accessSize = AccessSz;
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12-8} = src3;
+ let Inst{7} = 0b0;
+ }
+
+def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
+def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
+def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
+def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
+def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
+
+let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp, bit isH = 0>
+ : STInst <(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
+ AddrModeRel, ImmRegRel {
+ bits<5> src1;
+ bits<14> src2; // Actual address offset
+ bits<5> src3;
+ bits<11> offsetBits; // Represents offset encoding
+
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+ !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+ !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+ /* s11_0Ext */ 11)));
+ let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3},
+ !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
+ !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
+ /* s11_0Ext */ src2{10-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+ let IClass = 0b1010;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24} = 0b1;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = offsetBits{8};
+ let Inst{12-8} = src3;
+ let Inst{7-0} = offsetBits{7-0};
+ }
+
+let opExtendable = 2, isPredicated = 1 in
+class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0>
+ : STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""),
+ [],"",V2LDST_tc_st_SLOT01 >,
+ AddrModeRel, ImmRegRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3; // Actual address offset
+ bits<5> src4;
+ bits<6> offsetBits; // Represents offset encoding
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = PredNot;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+ !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+ !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+ /* u6_0Ext */ 6)));
+ let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3},
+ !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
+ !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
+ /* u6_0Ext */ src3{5-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{26} = PredNot;
+ let Inst{25} = isPredNew;
+ let Inst{24} = 0b0;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = offsetBits{5};
+ let Inst{12-8} = src4;
+ let Inst{7-3} = offsetBits{4-0};
+ let Inst{1-0} = src1;
+ }
+
+let isExtendable = 1, hasSideEffects = 0 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>;
+
+ // Predicated
+ def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>;
+ def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>;
+
+ // Predicated new
+ def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+ MajOp, 0, 1, isH>;
+ def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+ MajOp, 1, 1, isH>;
+ }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm" in {
+ let accessSize = ByteAccess in
+ defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
+
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>;
+
+ let accessSize = WordAccess, opExtentAlign = 2 in
+ defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>;
+
+ let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in
+ defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+ u6_3Ext, 0b110>;
+
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext,
+ u6_1Ext, 0b011, 1>;
+}
+
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod. The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi),
+ (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Storex_fi_pat <Store, Value, MI>;
+ def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_add_pat <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ PatFrag ValueMod, InstHexagon MI> {
+ def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
+ def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+ : PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
+
+let AddedComplexity = 20 in {
+ defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
+
+ defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
+def: Storex_simple_pat<store, I32, S2_storeri_io>;
+def: Storex_simple_pat<store, I64, S2_storerd_io>;
+
+def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
+
+let AddedComplexity = 20 in {
+ defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+ defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+ defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
+
+// Store predicate.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_pred : STInst<(outs),
+ (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1),
+ ".error \"should not emit\"", []>;
+
+// S2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R29, R31, R30],
+ hasSideEffects = 0, accessSize = DoubleWordAccess in
+def S2_allocframe: ST0Inst <
+ (outs), (ins u11_3Imm:$u11_3),
+ "allocframe(#$u11_3)" > {
+ bits<14> u11_3;
+
+ let IClass = 0b1010;
+ let Inst{27-16} = 0b000010011101;
+ let Inst{13-11} = 0b000;
+ let Inst{10-0} = u11_3{13-3};
+ }
+
+// S2_storer[bhwdf]_pci: Store byte/half/word/double.
+// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
+let Uses = [CS] in
+class T_store_pci <string mnemonic, RegisterClass RC,
+ Operand Imm, bits<4>MajOp,
+ MemAccessSize AlignSize, string RegSrc = "Rt">
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt),
+ #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"",
+ [] ,
+ "$Rz = $_dst_" > {
+ bits<5> Rz;
+ bits<7> offset;
+ bits<1> Mu;
+ bits<5> Rt;
+ let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
+
+ let IClass = 0b1010;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{6-3} =
+ !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3},
+ !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
+ !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+ /* ByteAccess */ offset{3-0})));
+ let Inst{1} = 0b0;
+ }
+
+def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
+ ByteAccess>;
+def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
+ HalfWordAccess>;
+def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
+ HalfWordAccess, "Rt.h">;
+def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
+ WordAccess>;
+def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
+ DoubleWordAccess>;
+
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
+class T_storenew_pci <string mnemonic, Operand Imm,
+ bits<2>MajOp, MemAccessSize AlignSize>
+ : NVInst < (outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
+ [],
+ "$Rz = $_dst_"> {
+ bits<5> Rz;
+ bits<6> offset;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-21} = 0b1001101;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = Nt;
+ let Inst{7} = 0b0;
+ let Inst{6-3} =
+ !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
+ !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+ /* ByteAccess */ offset{3-0}));
+ let Inst{1} = 0b0;
+ }
+
+def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
+def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
+def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pci_pseudo <string opc, RegisterClass RC>
+ : STInstPI<(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4),
+ ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"",
+ [], "$_dst_ = $src1">;
+
+def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>;
+def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>;
+def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS] in
+class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
+ MemAccessSize AlignSize, string RegSrc = "Rt">
+ : STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt),
+ #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"",
+ [],
+ "$Rz = $_dst_" > {
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<5> Rt;
+
+ let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
+
+ let IClass = 0b1010;
+ let Inst{27-25} = 0b100;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{1} = 0b1;
+ }
+
+def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
+def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
+def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
+def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
+ HalfWordAccess, "Rt.h">;
+
+//===----------------------------------------------------------------------===//
+// Circular .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_storenew_pcr <string mnemonic, bits<2>MajOp,
+ MemAccessSize AlignSize>
+ : NVInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
+ [] ,
+ "$Rz = $_dst_"> {
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let accessSize = AlignSize;
+
+ let IClass = 0b1010;
+ let Inst{27-21} = 0b1001101;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = Nt;
+ let Inst{7} = 0b0;
+ let Inst{1} = 0b1;
+ }
+
+def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_store_pbr<string mnemonic, RegisterClass RC,
+ MemAccessSize addrSize, bits<3> majOp,
+ bit isHalf = 0>
+ : STInst
+ <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src),
+ #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""),
+ [], "$Rz = $_dst_" > {
+
+ let accessSize = addrSize;
+
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<5> src;
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = majOp;
+ let Inst{7} = 0b0;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{12-8} = src;
+ }
+
+let isNVStorable = 1 in {
+ let BaseOpcode = "S2_storerb_pbr" in
+ def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
+ 0b000>, NewValueRel;
+ let BaseOpcode = "S2_storerh_pbr" in
+ def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess,
+ 0b010>, NewValueRel;
+ let BaseOpcode = "S2_storeri_pbr" in
+ def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
+ 0b100>, NewValueRel;
+}
+
+def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
+def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
+ hasSideEffects = 0 in
+class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
+ : NVInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+ #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
+ "$Rz = $_dst_">, NewValueRel {
+ let accessSize = addrSize;
+ bits<5> Rz;
+ bits<1> Mu;
+ bits<3> Nt;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1111101;
+ let Inst{12-11} = majOp;
+ let Inst{7} = 0b0;
+ let Inst{20-16} = Rz;
+ let Inst{13} = Mu;
+ let Inst{10-8} = Nt;
+ }
+
+let BaseOpcode = "S2_storerb_pbr" in
+def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
+
+let BaseOpcode = "S2_storerh_pbr" in
+def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
+
+let BaseOpcode = "S2_storeri_pbr" in
+def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pbr_pseudo <string opc, RegisterClass RC>
+ : STInstPI<(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, RC:$src2, IntRegs:$src3),
+ ".error \""#opc#"($src1++$src3:brev) = $src2\"",
+ [], "$_dst_ = $src1">;
+
+def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>;
+def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>;
+def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Template class for S_2op instructions.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+ RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
+ : SInst <(outs RCOut:$dst), (ins RCIn:$src),
+ "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""),
+ [], "", S_2op_tc_1_SLOT23 > {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = src;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp>
+ : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+ : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>;
+
+let hasNewValue = 1 in
+class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+ : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
+
+// Vector sign/zero extend
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
+ def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
+ def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
+ def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
+}
+
+// Vector splat bytes/halfwords
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
+ def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
+}
+
+// Sign extend word to doubleword
+def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
+
+def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+
+// Vector saturate and pack
+let Defs = [USR_OVF] in {
+ def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
+ def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
+ def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>;
+ def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>;
+ def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
+ def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
+}
+
+// Vector truncate
+def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
+def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
+
+// Swizzle the bytes of a word
+def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
+
+// Saturate
+let Defs = [USR_OVF] in {
+ def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>;
+ def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>;
+ def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
+ def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>;
+ def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
+ def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
+}
+
+let Itinerary = S_2op_tc_2_SLOT23 in {
+ // Vector round and pack
+ def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
+
+ let Defs = [USR_OVF] in
+ def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
+
+ // Bit reverse
+ def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
+
+ // Absolute value word
+ def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>;
+
+ let Defs = [USR_OVF] in
+ def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>;
+
+ // Negate with saturation
+ let Defs = [USR_OVF] in
+ def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
+}
+
+def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
+ (i32 (sub 0, (i32 IntRegs:$src))),
+ (i32 IntRegs:$src))),
+ (A2_abs IntRegs:$src)>;
+
+let AddedComplexity = 50 in
+def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
+ (i32 IntRegs:$src)),
+ (sra (i32 IntRegs:$src), (i32 31)))),
+ (A2_abs IntRegs:$src)>;
+
+class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+ RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
+ bit isSat, bit isRnd, list<dag> pattern = []>
+ : SInst <(outs RCOut:$dst),
+ (ins RCIn:$src, u5Imm:$u5),
+ "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
+ #!if(isRnd, ":rnd", ""),
+ pattern, "", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src;
+ bits<5> u5;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = u5;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
+ : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp,
+ isSat, isRnd, pattern>;
+
+class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
+ : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0,
+ [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
+ (u5ImmPred:$u5)))]>;
+
+// Vector arithmetic shift right by immediate with truncate and pack
+def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
+
+// Arithmetic/logical shift right/left by immediate
+let Itinerary = S_2op_tc_1_SLOT23 in {
+ def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
+ def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
+ def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
+}
+
+// Shift left by immediate with saturation
+let Defs = [USR_OVF] in
+def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
+
+// Shift right with round
+def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
+
+let isAsmParserOnly = 1 in
+def S2_asr_i_r_rnd_goodsyntax
+ : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5),
+ "$dst = asrrnd($src, #$u5)",
+ [], "", S_2op_tc_1_SLOT23>;
+
+let isAsmParserOnly = 1 in
+def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
+ "$dst = not($src)">;
+
+def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
+ (i32 1))),
+ (i32 1))),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>;
+
+class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
+ : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+ "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
+ bits<5> Rss;
+ bits<5> Rdd;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = minOp;
+ let Inst{4-0} = Rdd;
+}
+
+def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
+def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
+def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
+
+// Innterleave/deinterleave
+def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>;
+def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
+
+// Vector Complex conjugate
+def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
+
+// Vector saturate without pack
+def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>;
+def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
+def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>;
+def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>;
+def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
+
+// Vector absolute value words with and without saturation
+def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>;
+def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
+
+def : Pat<(not (i64 DoubleRegs:$src1)),
+ (A2_notp DoubleRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+// Bit count
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32,
+ dag Out, dag Inp>
+ : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ let IClass = 0b1000;
+ let Inst{27} = 0b1;
+ let Inst{26} = Is32;
+ let Inst{25-24} = 0b00;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
+
+class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp>
+ : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1,
+ (outs IntRegs:$Rd), (ins IntRegs:$Rs)>;
+
+class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
+ : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
+ (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
+
+def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>;
+def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>;
+def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>;
+def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>;
+def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>;
+def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>;
+def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>;
+def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
+def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
+
+// Count leading zeros.
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+
+// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
+
+// Bit set/clear/toggle
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
+ : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5),
+ "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> u5;
+ let IClass = 0b1000;
+ let Inst{27-21} = 0b1100110;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = u5;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
+ : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-22} = 0b011010;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = Rd;
+}
+
+def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>;
+def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>;
+def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
+def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>;
+def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
+def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
+
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
+ (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+ (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+ (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
+ (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Bit test
+
+let hasSideEffects = 0 in
+class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5),
+ "$Pd = "#MnOp#"($Rs, #$u5)",
+ [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> u5;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0;
+ let Inst{12-8} = u5;
+ let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BIT_REG<string MnOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#MnOp#"($Rs, $Rt)",
+ [], "", S_3op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-22} = 0b011100;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
+
+def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
+def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+ def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
+ (S2_tstbit_i IntRegs:$Rs, 0)>;
+ def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
+ (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
+ "$Pd = "#MnOp#"($Rs, #$u6)",
+ [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<6> u6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = u6;
+ let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#MnOp#"($Rs, $Rt)",
+ [], "", S_3op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+ let IClass = 0b1100;
+ let Inst{27-24} = 0b0111;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{1-0} = Pd;
+}
+
+def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
+def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
+def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
+
+let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
+ (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>;
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
+ (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
+ (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM +
+//===----------------------------------------------------------------------===//
+
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+ (i32 8)),
+ (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+ (zextloadi8 (i32 IntRegs:$b))),
+ (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Predicate transfer.
+let hasSideEffects = 0, hasNewValue = 1 in
+def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
+ "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<2> Ps;
+
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1001;
+ let Inst{22} = 0b1;
+ let Inst{17-16} = Ps;
+ let Inst{4-0} = Rd;
+}
+
+// Transfer general register to predicate.
+let hasSideEffects = 0 in
+def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
+ "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+ let Inst{27-21} = 0b0101010;
+ let Inst{20-16} = Rs;
+ let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0, isCodeGenOnly = 1 in
+def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
+ "$dst = $src">;
+
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+ (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load (i32 IntRegs:$Rs))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+ (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+ (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
+ Operand Imm, list<dag> pattern = [], bit isRnd = 0>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2),
+ "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""),
+ pattern> {
+ bits<5> src1;
+ bits<5> dst;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+}
+
+class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
+ : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm,
+ [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+ u6ImmPred:$src2))]> {
+ bits<6> src2;
+ let Inst{13-8} = src2;
+}
+
+// Shift by immediate.
+def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
+def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
+def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
+
+// Shift left by small amount and add.
+let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
+def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
+ "$Rd = addasl($Rt, $Rs, #$u3)" ,
+ [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt),
+ (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))],
+ "", S_3op_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rt;
+ bits<5> Rs;
+ bits<3> u3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b0100000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = u3;
+ let Inst{4-0} = Rd;
+ }
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
+
+let hasSideEffects = 1, isSoloAX = 1 in
+def Y2_barrier : SYSInst<(outs), (ins),
+ "barrier",
+ [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
+ let Inst{31-28} = 0b1010;
+ let Inst{27-21} = 0b1000000;
+}
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def TFR_FI : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$fi, s32Imm:$off), "">;
+ def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">;
+}
+
+//===----------------------------------------------------------------------===//
+// CRUSER - Type.
+//===----------------------------------------------------------------------===//
+// HW loop
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2),
+ #mnemonic#"($offset, #$src2)",
+ [], "" , CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<10> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2{9-5};
+ let Inst{12-8} = offset{8-4};
+ let Inst{7-5} = src2{4-2};
+ let Inst{4-3} = offset{3-2};
+ let Inst{1-0} = src2{1-0};
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
+ #mnemonic#"($offset, $src2)",
+ [], "" ,CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<5> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b000000;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2;
+ let Inst{12-8} = offset{8-4};
+ let Inst{4-3} = offset{3-2};
+ }
+
+multiclass LOOP_ri<string mnemonic> {
+ def i : LOOP_iBase<mnemonic, brtarget>;
+ def r : LOOP_rBase<mnemonic, brtarget>;
+
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+ def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+ }
+}
+
+
+let Defs = [SA0, LC0, USR] in
+defm J2_loop0 : LOOP_ri<"loop0">;
+
+// Interestingly only loop0's appear to set usr.lpcfg
+let Defs = [SA1, LC1] in
+defm J2_loop1 : LOOP_ri<"loop1">;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC1], Uses = [SA1, LC1] in {
+def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop1",
+ []>;
+}
+
+// Pipelined loop instructions, sp[123]loop0
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_iBase<string SP, bits<2> op>
+ : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10),
+ "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
+ bits<9> r7_2;
+ bits<10> U10;
+
+ let IClass = 0b0110;
+
+ let Inst{22-21} = op;
+ let Inst{27-23} = 0b10011;
+ let Inst{20-16} = U10{9-5};
+ let Inst{12-8} = r7_2{8-4};
+ let Inst{7-5} = U10{4-2};
+ let Inst{4-3} = r7_2{3-2};
+ let Inst{1-0} = U10{1-0};
+ }
+
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+ isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_rBase<string SP, bits<2> op>
+ : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs),
+ "p3 = sp"#SP#"loop0($r7_2, $Rs)" > {
+ bits<9> r7_2;
+ bits<5> Rs;
+
+ let IClass = 0b0110;
+
+ let Inst{22-21} = op;
+ let Inst{27-23} = 0b00001;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = r7_2{8-4};
+ let Inst{4-3} = r7_2{3-2};
+ }
+
+multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
+ def i : SPLOOP_iBase<mnemonic, op>;
+ def r : SPLOOP_rBase<mnemonic, op>;
+}
+
+defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
+defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
+defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
+
+// if (Rs[!>=<]=#0) jump:[t/nt]
+let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
+ hasSideEffects = 0 in
+class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
+ : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
+ "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
+ bits<5> Rs;
+ bits<15> r13_2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-24} = 0b0001;
+ let Inst{23-22} = op;
+ let Inst{12} = isTak;
+ let Inst{21} = r13_2{14};
+ let Inst{20-16} = Rs;
+ let Inst{11-1} = r13_2{12-2};
+ let Inst{13} = r13_2{13};
+ }
+
+multiclass J2_jump_compare_0<string compare, bits<2> op> {
+ def NAME : J2_jump_0_Base<compare, 0, op>;
+ def NAME#pt : J2_jump_0_Base<compare, 1, op>;
+}
+
+defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>;
+defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
+defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>;
+defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
+
+// Transfer to/from Control/GPR Guest/GPR
+let hasSideEffects = 0 in
+class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
+ : CRInst <(outs CTRC:$dst), (ins RC:$src),
+ "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b0110;
+
+ let Inst{27-25} = 0b001;
+ let Inst{24} = isDouble;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = src;
+ let Inst{4-0} = dst;
+ }
+
+def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
+def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+
+let hasSideEffects = 0 in
+class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
+ : CRInst <(outs RC:$dst), (ins CTRC:$src),
+ "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+ bits<5> dst;
+ bits<5> src;
+
+ let IClass = 0b0110;
+
+ let Inst{27-26} = 0b10;
+ let Inst{25} = isSingle;
+ let Inst{24-21} = 0b0000;
+ let Inst{20-16} = src;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1, opNewValue = 0 in
+def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
+def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+// Y4_trace: Send value to etm trace.
+let isSoloAX = 1, hasSideEffects = 0 in
+def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
+ "trace($Rs)"> {
+ bits<5> Rs;
+
+ let IClass = 0b0110;
+ let Inst{27-21} = 0b0010010;
+ let Inst{20-16} = Rs;
+ }
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+ hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp>
+ : ALU32_ri<(outs IntRegs:$dst),
+ (ins i32imm:$imm_value),
+ "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> {
+ bits<5> dst;
+ bits<32> imm_value;
+ let IClass = 0b0111;
+
+ let Inst{27} = Rs;
+ let Inst{26-24} = MajOp;
+ let Inst{21} = MinOp;
+ let Inst{20-16} = dst;
+ let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30});
+ let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16});
+}
+
+let isAsmParserOnly = 1 in {
+ def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>;
+ def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>;
+ def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>;
+ def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>;
+}
+
+let isMoveImm = 1, isCodeGenOnly = 1 in
+def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.l = #LO($label@GOTREL)",
+ []>;
+
+let isMoveImm = 1, isCodeGenOnly = 1 in
+def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst.h = #HI($label@GOTREL)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOT)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOT)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOTREL)",
+ []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOTREL)",
+ []>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+// This is for sdata.
+let isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst),
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+ "$dst = CONST32(#$global)",
+ [(set (i32 IntRegs:$dst), imm:$global) ]>;
+
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+ "$dst = CONST64(#$global)",
+ [(set (i64 DoubleRegs:$dst), imm:$global)]>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "",
+ [(set (i1 PredRegs:$dst), 1)]>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)",
+ [(set (i1 PredRegs:$dst), 0)]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ ".error \"should not emit\" ",
+ [(callseq_start timm:$amt)]>;
+
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ ".error \"should not emit\" ",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+// Call subroutine indirectly.
+let Defs = VolatileV3.Regs in
+def J2_callr : JUMPR_MISC_CALLR<0, 1>;
+
+// Indirect tail-call.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNr : T_JMPr;
+
+// Direct tail-calls.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
+
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNi tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNi texternalsym:$dst)>;
+def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+ (TCRETURNr IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def: Pat<(and (i32 IntRegs:$src1), 65535),
+ (A2_zxth IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def: Pat<(and (i32 IntRegs:$src1), 255),
+ (A2_zxtb IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def: Pat<(add (i1 PredRegs:$src1), -1),
+ (C2_not PredRegs:$src1)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
+ (i32 IntRegs:$src3)),
+ (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+ (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+ (A2_sxtw (LoReg DoubleRegs:$src1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+ (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+ (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
+
+// We want to prevent emitting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a J2_jumpf.
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+ bb:$offset),
+ (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
+def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
+ (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3)),
+ (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+ (HiReg DoubleRegs:$src3)),
+ (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+ (LoReg DoubleRegs:$src3)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
+ (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+ (LoReg DoubleRegs:$src)>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+ (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
+// rs <= rt -> !(rs > rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+ (C2_xor PredRegs:$src1, PredRegs:$src2)>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
+
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+// rs < rt -> !(rs >= rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1,
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
+ (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Sign extends.
+// i1 -> i32
+def: Pat<(i32 (sext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, -1, 0)>;
+
+// i1 -> i64
+def: Pat<(i64 (sext (i1 PredRegs:$src1))),
+ (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+
+// Zero extends.
+// i1 -> i32
+def: Pat<(i32 (zext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
+ (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32),
+ (HiReg $src1),
+ (LoReg $src2)),
+ (A2_combinew (A2_tfrsi 0),
+ (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))),
+ 32),
+ (HiReg $src1),
+ (HiReg $src2)),
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32)
+)>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+ [SDNPHasChain]>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1,
+ usesCustomInserter = 1 in
+def ALLOCA: ALU32Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u32Imm:$A), "",
+ [(set (i32 IntRegs:$Rd),
+ (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+let isCodeGenOnly = 1 in
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ [(set (i32 IntRegs:$dst),
+ (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
+
+let AddedComplexity = 100 in
+def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+ (i32 IntRegs:$src1)>;
+
+def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
+
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>;
+
+// XTYPE/SHIFT
+//
+//===----------------------------------------------------------------------===//
+// Template Class
+// Shift by immediate/register and accumulate/logical
+//===----------------------------------------------------------------------===//
+
+// Rx[+-&|]=asr(Rs,#u5)
+// Rx[+-&|^]=lsr(Rs,#u5)
+// Rx[+-&|^]=asl(Rs,#u5)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5),
+ "$Rx "#opc2#opc1#"($Rs, #$u5)",
+ [(set (i32 IntRegs:$Rx),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))],
+ "$src1 = $Rx", S_2op_tc_2_SLOT23> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> u5;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1110;
+ let Inst{23-22} = majOp{2-1};
+ let Inst{13} = 0b0;
+ let Inst{7} = majOp{0};
+ let Inst{6-5} = minOp;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = u5;
+ }
+
+// Rx[+-&|]=asr(Rs,Rt)
+// Rx[+-&|^]=lsr(Rs,Rt)
+// Rx[+-&|^]=asl(Rs,Rt)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<2> majOp, bits<2> minOp>
+ : SInst_acc<(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#opc2#opc1#"($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rx),
+ (OpNode2 (i32 IntRegs:$src1),
+ (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))],
+ "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{23-22} = majOp;
+ let Inst{7-6} = minOp;
+ let Inst{4-0} = Rx;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ }
+
+// Rxx[+-&|]=asr(Rss,#u6)
+// Rxx[+-&|^]=lsr(Rss,#u6)
+// Rxx[+-&|^]=asl(Rss,#u6)
+
+class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6),
+ "$Rxx "#opc2#opc1#"($Rss, #$u6)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))],
+ "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<6> u6;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-22} = majOp{2-1};
+ let Inst{7} = majOp{0};
+ let Inst{6-5} = minOp;
+ let Inst{4-0} = Rxx;
+ let Inst{20-16} = Rss;
+ let Inst{13-8} = u6;
+ }
+
+
+// Rxx[+-&|]=asr(Rss,Rt)
+// Rxx[+-&|^]=lsr(Rss,Rt)
+// Rxx[+-&|^]=asl(Rss,Rt)
+// Rxx[+-&|^]=lsl(Rss,Rt)
+
+class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+ : SInst_acc<(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rxx "#opc2#opc1#"($Rss, $Rt)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (OpNode2 (i64 DoubleRegs:$src1),
+ (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))],
+ "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = majOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = minOp;
+ let Inst{4-0} = Rxx;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multi-class for the shift instructions with logical/arithmetic operators.
+//===----------------------------------------------------------------------===//
+
+multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1,
+ SDNode OpNode2, bits<3> majOp, bits<2> minOp > {
+ def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1,
+ OpNode2, majOp, minOp >;
+ def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1,
+ OpNode2, majOp, minOp >;
+}
+
+multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>;
+
+ defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>;
+ defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>;
+ defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>;
+}
+
+multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
+let AddedComplexity = 100 in
+ defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
+}
+
+defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
+
+defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
+ xtype_xor_imm_acc<"lsr", srl, 0b01>;
+
+defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
+ xtype_xor_imm_acc<"asl", shl, 0b10>;
+
+multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>;
+
+ def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>;
+ def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>;
+ def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>;
+}
+
+multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> {
+ let AddedComplexity = 100 in
+ def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>;
+
+ def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>;
+ def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>;
+ def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>;
+ def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>;
+}
+
+multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
+ defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>;
+ defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
+}
+
+defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
+defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
+defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
+defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
+
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp,
+ bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0>
+ : SInst <(outs RC:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "")
+ #!if(hasShift,":>>1","")
+ #!if(isSat, ":sat", ""),
+ [], "", S_3op_tc_2_SLOT23 > {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0001;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = !if (SwapOps, src2, src1);
+ let Inst{12-8} = !if (SwapOps, src1, src2);
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
+ bit isSat = 0, bit isRnd = 0, bit hasShift = 0 >
+ : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
+ isSat, isRnd, hasShift>;
+
+let Itinerary = S_3op_tc_1_SLOT23 in {
+ def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
+ def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
+ def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
+ def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
+
+ def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
+ def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
+}
+
+def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
+
+let hasSideEffects = 0 in
+class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
+ : SInst < (outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<2> Pu;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+ let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rdd;
+ }
+
+def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>;
+def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template class used by vector shift, vector rotate, vector neg,
+// 32-bit shift, 64-bit shifts, etc.
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
+ bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] >
+ : SInst <(outs RC:$dst),
+ (ins RC:$src1, IntRegs:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""),
+ pattern, "", S_3op_tc_1_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011);
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1 in
+class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0,
+ [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
+class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>;
+
+
+class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0,
+ [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+ (i32 IntRegs:$src2)))]>;
+
+
+class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
+ : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>;
+
+
+// Shift by register
+// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
+
+def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
+def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
+def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
+def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
+
+// Rd=[asr|lsr|asl|lsl](Rs,Rt)
+
+def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
+def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
+def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
+def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
+
+// Shift by register with saturation
+// Rd=asr(Rs,Rt):sat
+// Rd=asl(Rs,Rt):sat
+
+let Defs = [USR_OVF] in {
+ def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
+ def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
+ : SInst < (outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
+ #!if(hasShift, ":<<1", "")
+ #!if(isRnd, ":rnd", "")
+ #!if(isSat, ":sat", ""),
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0101;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
+
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
+
+let hasSideEffects = 0 in
+class T_S3op_7 <string mnemonic, bit MajOp >
+ : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
+ [], "", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<3> u3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0000;
+ let Inst{23} = MajOp;
+ let Inst{20-16} = !if(MajOp, Rss, Rtt);
+ let Inst{12-8} = !if(MajOp, Rtt, Rss);
+ let Inst{7-5} = u3;
+ let Inst{4-0} = Rdd;
+ }
+
+def S2_valignib : T_S3op_7 < "valignb", 0>;
+def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
+
+//===----------------------------------------------------------------------===//
+// Template class for 'insert bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_insert <string mnemonic, RegisterClass RC>
+ : SInst <(outs RC:$dst),
+ (ins RC:$src1, RC:$src2, DoubleRegs:$src3),
+ "$dst = "#mnemonic#"($src2, $src3)" ,
+ [], "$src1 = $dst", S_3op_tc_1_SLOT23 > {
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let IClass = 0b1100;
+
+ let Inst{27-26} = 0b10;
+ let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10);
+ let Inst{23} = 0b0;
+ let Inst{20-16} = src2;
+ let Inst{12-8} = src3;
+ let Inst{4-0} = dst;
+ }
+
+let hasSideEffects = 0 in
+class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
+ : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3),
+ "$dst = insert($src1, #$src2, #$src3)",
+ [], "$dst2 = $dst", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+ bits<6> src3;
+ bit bit23;
+ bit bit13;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0);
+ let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23} = bit23;
+ let Inst{22-21} = src3{4-3};
+ let Inst{20-16} = src1;
+ let Inst{13} = bit13;
+ let Inst{12-8} = src2{4-0};
+ let Inst{7-5} = src3{2-0};
+ let Inst{4-0} = dst;
+ }
+
+// Rx=insert(Rs,Rtt)
+// Rx=insert(Rs,#u5,#U5)
+let hasNewValue = 1 in {
+ def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
+ def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>;
+}
+
+// Rxx=insert(Rss,Rtt)
+// Rxx=insert(Rss,#u6,#U6)
+def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
+def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
+
+
+def SDTHexagonINSERT:
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+ (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+ (i32 (extloadi8 (add I32:$b, 3))),
+ 24, 8),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+ (zextloadi8 I32:$b)),
+ (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
+
+//===----------------------------------------------------------------------===//
+// Template class for 'extract bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_extract <string mnemonic, bits<2> MinOp>
+ : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+ "$Rd = "#mnemonic#"($Rs, $Rtt)",
+ [], "", S_3op_tc_2_SLOT23 > {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rtt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rtt;
+ let Inst{7-6} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+let hasSideEffects = 0 in
+class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
+ RegisterClass RC, Operand ImmOp>
+ : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3),
+ "$dst = "#mnemonic#"($src1, #$src2, #$src3)",
+ [], "", S_2op_tc_2_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+ bits<6> src3;
+ bit bit23;
+ bit bit13;
+ string ImmOpStr = !cast<string>(ImmOp);
+
+ let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5},
+ !if (!eq(mnemonic, "extractu"), 0, 1));
+
+ let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = RegTyBits;
+ let Inst{23} = bit23;
+ let Inst{22-21} = src3{4-3};
+ let Inst{20-16} = src1;
+ let Inst{13} = bit13;
+ let Inst{12-8} = src2{4-0};
+ let Inst{7-5} = src3{2-0};
+ let Inst{4-0} = dst;
+ }
+
+// Extract bitfield
+
+// Rdd=extractu(Rss,Rtt)
+// Rdd=extractu(Rss,#u6,#U6)
+def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
+def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
+
+// Rd=extractu(Rs,Rtt)
+// Rd=extractu(Rs,#u5,#U5)
+let hasNewValue = 1 in {
+ def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
+ def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
+}
+
+def SDTHexagonEXTRACTU:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+ (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+ (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+ (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+ (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+ (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// :raw for of tableindx[bdhw] insns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class tableidxRaw<string OpStr, bits<2>MinOp>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6),
+ "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
+ [], "$Rx = $_dst_" > {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<4> u4;
+ bits<6> S6;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b0111;
+ let Inst{23-22} = MinOp;
+ let Inst{21} = u4{3};
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = S6;
+ let Inst{7-5} = u4{2-0};
+ let Inst{4-0} = Rx;
+ }
+
+def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
+def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
+def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
+def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
+
+//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+ "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+ [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV5.td"
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/64/Vector +
+//===----------------------------------------------------------------------===///
+
+include "HexagonInstrInfoVector.td"
+
+include "HexagonInstrAlias.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 0000000..84d035d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,266 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 0,
+ isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
+class T_Call<string ExtStr>
+ : JInst<(outs), (ins calltarget:$dst),
+ "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ bits<24> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b101;
+ let Inst{24-16,13-1} = dst{23-2};
+ let Inst{0} = 0b0;
+}
+
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 1,
+ isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
+class T_CallPred<bit IfTrue, string ExtStr>
+ : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst),
+ CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst",
+ [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ let isPredicatedFalse = !if(IfTrue,0,1);
+ bits<2> Pu;
+ bits<17> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-24} = 0b1101;
+ let Inst{23-22,20-16,13,7-1} = dst{16-2};
+ let Inst{21} = !if(IfTrue,0,1);
+ let Inst{11} = 0b0;
+ let Inst{9-8} = Pu;
+}
+
+multiclass T_Calls<string ExtStr> {
+ def NAME : T_Call<ExtStr>;
+ def t : T_CallPred<1, ExtStr>;
+ def f : T_CallPred<0, ExtStr>;
+}
+
+defm J2_call: T_Calls<"">, PredRel;
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in
+def CALLv3nr : T_Call<"">, PredRel;
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+
+let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
+ def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
+def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
+
+class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
+ : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
+
+def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
+def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
+
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
+ [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
+ (i64 DoubleRegs:$Rt))))],
+ "", ALU64_tc_1_SLOT23>;
+
+
+let hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
+ : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs),
+ "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+ #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00111;
+ let Inst{22-21} = !if(isMax, 0b10, 0b01);
+ let Inst{20-16} = !if(isMax, Rt, Rs);
+ let Inst{12-8} = !if(isMax, Rs, Rt);
+ let Inst{7} = 0b1;
+ let Inst{6} = !if(isMax, 0b0, 0b1);
+ let Inst{5} = isUnsigned;
+ let Inst{4-0} = Rd;
+}
+
+def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>;
+def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
+def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
+def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
+
+multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
+}
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
+ defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
+
+// Map call instruction
+def : Pat<(callv3 (i32 IntRegs:$dst)),
+ (J2_callr (i32 IntRegs:$dst))>;
+def : Pat<(callv3 tglobaladdr:$dst),
+ (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+ (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+ (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr (i32 IntRegs:$dst)),
+ (CALLRv3nr (i32 IntRegs:$dst))>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+ (CALLv3nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+ (CALLv3nr texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// :raw form of vrcmpys:hi/lo insns
+//===----------------------------------------------------------------------===//
+// Vector reduce complex multiply by scalar.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
+ MInst<(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = Rdd;
+}
+
+def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
+def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def M2_vrcmpys_s1
+ : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
+
+// Vector reduce complex multiply by scalar with accumulation.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
+ MInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
+ "$Rxx = $_src_"> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = Rxx;
+ }
+
+def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
+def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
+
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_acc_s1
+ : MInst <(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
+ "$dst += vrcmpys($src1, $src2):<<1:sat", [],
+ "$dst2 = $dst">;
+
+def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
+def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
+
+// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_s1rp
+ : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
+
+
+// S2_cabacdecbin: Cabac decode bin.
+let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
+def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 0000000..87d6b35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,4251 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
+let hasSideEffects = 0 in
+class T_Immext<Operand ImmType>
+ : EXTENDERInst<(outs), (ins ImmType:$imm),
+ "immext(#$imm)", []> {
+ bits<32> imm;
+ let IClass = 0b0000;
+
+ let Inst{27-16} = imm{31-20};
+ let Inst{13-0} = imm{19-6};
+ }
+
+def A4_ext : T_Immext<u26_6Imm>;
+let isCodeGenOnly = 1 in {
+ let isBranch = 1 in
+ def A4_ext_b : T_Immext<brtarget>;
+ let isCall = 1 in
+ def A4_ext_c : T_Immext<calltarget>;
+ def A4_ext_g : T_Immext<globaladdress>;
+}
+
+def BITPOS32 : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev>
+ : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> {
+ let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
+}
+
+let BaseOpcode = "andn_rr", CextOpcode = "andn" in
+def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
+let BaseOpcode = "orn_rr", CextOpcode = "orn" in
+def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>;
+
+let CextOpcode = "rcmp.eq" in
+def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>;
+let CextOpcode = "!rcmp.eq" in
+def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
+
+def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>;
+def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
+def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
+
+// Pats for instruction selection.
+
+// A class to embed the usual comparison patfrags within a zext to i32.
+// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
+// names, or else the frag's "body" won't match the operands.
+class CmpInReg<PatFrag Op>
+ : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
+
+def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
+def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+
+def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
+class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
+ : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
+ ImmRegRel {
+ let InputType = "reg";
+ let CextOpcode = mnemonic;
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+ let Inst{27-21} = 0b0111110;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = Pd;
+}
+
+def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>;
+def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>;
+def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
+def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>;
+def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
+def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
+
+let AddedComplexity = 100 in {
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
+}
+
+class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
+ Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
+ : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
+ "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
+ ImmRegRel {
+ let InputType = "imm";
+ let CextOpcode = mnemonic;
+ let isCompare = 1;
+ let isCommutable = IsComm;
+ let hasSideEffects = 0;
+ let isExtendable = IsImmExt;
+ let opExtendable = !if (IsImmExt, 2, 0);
+ let isExtentSigned = IsImmSigned;
+ let opExtentBits = ImmBits;
+
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<8> Imm;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b1101;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-5} = Imm;
+ let Inst{4} = 0b0;
+ let Inst{3} = IsHalf;
+ let Inst{1-0} = Pd;
+}
+
+def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>;
+def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>;
+def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
+def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>;
+def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>;
+def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
+
+class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
+ : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
+ ImmRegRel {
+ let InputType = "imm";
+ let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
+ let isExtendable = 1;
+ let opExtendable = 2;
+ let isExtentSigned = 1;
+ let opExtentBits = 8;
+ let hasNewValue = 1;
+
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{22} = 0b1;
+ let Inst{21} = IsNeg;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rd;
+}
+
+def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
+def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
+
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
+
+// Preserve the S2_tstbit_r generation
+def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine a word and an immediate into a register pair.
+let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1,
+ opExtentBits = 8 in
+class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
+ : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> {
+ bits<5> Rdd;
+ bits<5> Rs;
+ bits<8> s8;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0011;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
+
+let opExtendable = 2 in
+def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rdd = combine($Rs, #$s8)">;
+
+let opExtendable = 1 in
+def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
+ "$Rdd = combine(#$s8, $Rs)">;
+
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+ (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
+
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+ (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
+
+// A4_combineii: Set two small immediates.
+let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
+def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
+ "$Rdd = combine(#$s8, #$U6)"> {
+ bits<5> Rdd;
+ bits<8> s8;
+ bits<6> U6;
+
+ let IClass = 0b0111;
+ let Inst{27-23} = 0b11001;
+ let Inst{20-16} = U6{5-1};
+ let Inst{13} = U6{0};
+ let Inst{12-5} = s8;
+ let Inst{4-0} = Rdd;
+ }
+
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
+ (A4_combineii imm:$s8, imm:$u6)>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+
+def Zext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A4_combineir 0, (i32 $Rs)))>;
+def Sext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+ PatLeaf ImmPred, InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)),
+ (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+ def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+ (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+ def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+ (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))),
+ (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// Template class for load instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
+ hasSideEffects = 0 in
+class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
+ LDInst<(outs RC:$dst1, IntRegs:$dst2),
+ (ins u6Ext:$addr),
+ "$dst1 = "#mnemonic#"($dst2 = #$addr)",
+ []> {
+ bits<7> name;
+ bits<5> dst1;
+ bits<5> dst2;
+ bits<6> addr;
+
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b101;
+ let Inst{24-21} = MajOp;
+ let Inst{13-12} = 0b01;
+ let Inst{4-0} = dst1;
+ let Inst{20-16} = dst2;
+ let Inst{11-8} = addr{5-2};
+ let Inst{6-5} = addr{1-0};
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+ def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>;
+ def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+ def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>;
+ def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
+ def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>;
+ def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+ def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
+
+let accessSize = WordAccess in {
+ def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
+ def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
+
+let accessSize = ByteAccess in
+ def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
+
+let accessSize = HalfWordAccess in
+def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
+
+// Load - Indirect with long offset
+let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
+opExtentBits = 6, opExtendable = 3 in
+class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+ bits<4> MajOp>
+ : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3),
+ "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
+ [] >, ImmRegShl {
+ bits<5> dst;
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ let CextOpcode = CextOp;
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+ let IClass = 0b1001;
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12} = 0b1;
+ let Inst{11-8} = src3{5-2};
+ let Inst{7} = src2{0};
+ let Inst{6-5} = src3{1-0};
+ let Inst{4-0} = dst;
+ }
+
+let accessSize = ByteAccess in {
+ def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>;
+ def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
+ def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
+ DoubleRegs, 0b0100>;
+}
+
+let accessSize = HalfWordAccess in {
+ def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>;
+ def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>;
+ def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>;
+ def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
+ def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
+ DoubleRegs, 0b0010>;
+}
+
+let accessSize = WordAccess in {
+ def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
+ def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
+ def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
+
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tglobaladdr:$src2)))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tconstpool:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tconstpool:$src2)))),
+ (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tjumptable:$src3)))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tjumptable:$src2)))),
+ (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template classes for the non-predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
+ LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2),
+ "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
+ [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<2> u2;
+
+ let IClass = 0b0011;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isNot, bit isPredNew>:
+ LDInst <(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
+ [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<5> src3;
+ bits<2> u2;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+
+ let IClass = 0b0011;
+
+ let Inst{27-26} = 0b00;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{12-8} = src3;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = src1;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with base + register offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = BaseRegOffset in
+multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp > {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl,
+ InputType = "reg" in {
+ let isPredicable = 1 in
+ def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>;
+
+ // Predicated
+ def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>;
+ def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>;
+
+ // Predicated new
+ def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>;
+ def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>;
+ }
+}
+
+let hasNewValue = 1, accessSize = ByteAccess in {
+ defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
+ defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
+}
+
+let hasNewValue = 1, accessSize = HalfWordAccess in {
+ defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
+ defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
+}
+
+let hasNewValue = 1, accessSize = WordAccess in
+defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
+
+let accessSize = DoubleWordAccess in
+defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
+let AddedComplexity = 40 in {
+ def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_pat<load, i64, L4_loadrd_rr>;
+}
+
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+ def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
+}
+
+// zext i1->i64
+def: Pat<(i64 (zext (i1 PredRegs:$src1))),
+ (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(i64 (zext (i32 IntRegs:$src1))),
+ (Zext64 IntRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 1, opExtentBits = 6,
+ addrMode = AbsoluteSet in
+class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
+ bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst<(outs IntRegs:$dst),
+ (ins u6Ext:$addr, RC:$src),
+ mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
+ bits<5> dst;
+ bits<6> addr;
+ bits<5> src;
+ let accessSize = AccessSz;
+ let BaseOpcode = BaseOp#"_AbsSet";
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = dst;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = src;
+ let Inst{7} = 0b1;
+ let Inst{5-0} = addr;
+ }
+
+def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
+ HalfWordAccess>;
+def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+
+let isNVStorable = 0 in {
+ def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
+ 0b011, HalfWordAccess, 1>;
+ def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
+ 0b110, DoubleWordAccess>;
+}
+
+let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
+isExtended = 1, opExtentBits= 6 in
+class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
+ MemAccessSize AccessSz >
+ : NVInst <(outs IntRegs:$dst),
+ (ins u6Ext:$addr, IntRegs:$src),
+ mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
+ bits<5> dst;
+ bits<6> addr;
+ bits<3> src;
+ let accessSize = AccessSz;
+ let BaseOpcode = BaseOp#"_AbsSet";
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = dst;
+ let Inst{13-11} = 0b000;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src;
+ let Inst{7} = 0b1;
+ let Inst{5-0} = addr;
+ }
+
+let mayStore = 1, addrMode = AbsoluteSet in {
+ def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
+ def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
+ def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
+}
+
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
+ addrMode = BaseLongOffset, AddedComplexity = 40 in
+class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+ : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4),
+ mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
+ []>, ImmRegShl, NewValueRel {
+
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ bits<5> src4;
+
+ let accessSize = AccessSz;
+ let CextOpcode = CextOp;
+ let BaseOpcode = CextOp#"_shl";
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} =0b1101;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12-8} = src4;
+ let Inst{7} = 0b1;
+ let Inst{6} = src2{0};
+ let Inst{5-0} = src3;
+}
+
+def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
+ HalfWordAccess>;
+def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
+ HalfWordAccess, 1>;
+def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
+ DoubleWordAccess>;
+
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+ PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+ u32ImmPred:$src3)),
+ (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
+ opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
+class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
+ MemAccessSize AccessSz>
+ : NVInst <(outs ),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4),
+ mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
+ bits<5> src1;
+ bits<2> src2;
+ bits<6> src3;
+ bits<3> src4;
+
+ let CextOpcode = CextOp;
+ let BaseOpcode = CextOp#"_shl";
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1101101;
+ let Inst{12-11} = 0b00;
+ let Inst{7} = 0b1;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2{1};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src4;
+ let Inst{6} = src2{0};
+ let Inst{5-0} = src3;
+ }
+
+def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
+def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
+def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Template classes for the non-predicated store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicable = 1 in
+class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
+ : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+ mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+ [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
+
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<5> Rt;
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+ let IClass = 0b0011;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-0} = Rt;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isNot, bit isPredNew, bit isH>
+ : STInst <(outs),
+ (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+
+ !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+ [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<5> Rt;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+ let IClass = 0b0011;
+
+ let Inst{27-26} = 0b01;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = Pv;
+ let Inst{4-0} = Rt;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
+class T_store_new_rr <string mnemonic, bits<2> MajOp> :
+ NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+ mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
+ [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
+
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<3> Nt;
+
+ let IClass = 0b0011;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{4-3} = MajOp;
+ let Inst{2-0} = Nt;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
+class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
+ : NVInst<(outs),
+ (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+ !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
+ [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<2> u2;
+ bits<3> Nt;
+
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+
+ let IClass = 0b0011;
+ let Inst{27-26} = 0b01;
+ let Inst{25} = isPredNew;
+ let Inst{24} = isNot;
+ let Inst{23-21} = 0b101;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Ru;
+ let Inst{13} = u2{1};
+ let Inst{7} = u2{0};
+ let Inst{6-5} = Pv;
+ let Inst{4-3} = MajOp;
+ let Inst{2-0} = Nt;
+ }
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + register offset addressing
+// mode
+//===----------------------------------------------------------------------===//
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC,
+ bits<3> MajOp, bit isH = 0> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>;
+
+ // Predicated
+ def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>;
+ def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>;
+
+ // Predicated new
+ def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>;
+ def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+//===----------------------------------------------------------------------===//
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
+ bits<2> MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+ def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>;
+
+ // Predicated
+ def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>;
+ def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>;
+
+ // Predicated new
+ def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>;
+ }
+}
+
+let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
+ let accessSize = ByteAccess in
+ defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
+ ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
+
+ let accessSize = HalfWordAccess in
+ defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>,
+ ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>;
+
+ let accessSize = WordAccess in
+ defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>,
+ ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>;
+
+ let isNVStorable = 0, accessSize = DoubleWordAccess in
+ defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>;
+
+ let isNVStorable = 0, accessSize = HalfWordAccess in
+ defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
+}
+
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))),
+ (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
+
+let AddedComplexity = 40 in {
+ def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexs_pat<store, I32, S4_storeri_rr>;
+ def: Storexs_pat<store, I64, S4_storerd_rr>;
+}
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+//===----------------------------------------------------------------------===//
+// Template class
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+ opExtendable = 2 in
+class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
+ : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8),
+ mnemonic#"($Rs+#$offset)=#$S8",
+ [], "", V4LDST_tc_st_SLOT01>,
+ ImmRegRel, PredNewRel {
+ bits<5> Rs;
+ bits<8> S8;
+ bits<8> offset;
+ bits<6> offsetBits;
+
+ string OffsetOpStr = !cast<string>(OffsetOp);
+ let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+ !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+ /* u6_0Imm */ offset{5-0}));
+
+ let IClass = 0b0011;
+
+ let Inst{27-25} = 0b110;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{12-7} = offsetBits;
+ let Inst{13} = S8{7};
+ let Inst{6-0} = S8{6-0};
+ }
+
+let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
+ opExtendable = 3 in
+class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+ bit isPredNot, bit isPredNew >
+ : STInst <(outs ),
+ (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6),
+ !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($Rs+#$offset)=#$S6",
+ [], "", V4LDST_tc_st_SLOT01>,
+ ImmRegRel, PredNewRel {
+ bits<2> Pv;
+ bits<5> Rs;
+ bits<6> S6;
+ bits<8> offset;
+ bits<6> offsetBits;
+
+ string OffsetOpStr = !cast<string>(OffsetOp);
+ let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+ !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+ /* u6_0Imm */ offset{5-0}));
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b0011;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24} = isPredNew;
+ let Inst{23} = isPredNot;
+ let Inst{22-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = S6{5};
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = Pv;
+ let Inst{4-0} = S6{4-0};
+ }
+
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
+
+multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+ bit PredNot> {
+ def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>;
+ // Predicate new
+ def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>;
+}
+
+multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
+ bits<2> MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+ def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>;
+
+ defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>;
+ defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>;
+ }
+}
+
+let hasSideEffects = 0, addrMode = BaseImmOffset,
+ InputType = "imm" in {
+ let accessSize = ByteAccess in
+ defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
+
+ let accessSize = HalfWordAccess in
+ defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>;
+
+ let accessSize = WordAccess in
+ defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
+}
+
+def IMM_BYTE : SDNodeXForm<imm, [{
+ // -1 etc is represented as 255 etc
+ // assigning to a byte restores our desired signed value.
+ int8_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+ // -1 etc is represented as 65535 etc
+ // assigning to a short restores our desired signed value.
+ int16_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+ // -1 etc can be represented as 4294967295 etc
+ // Currently, it's not doing this. But some optimization
+ // might convert -1 to a large +ve number.
+ // assigning to a word restores our desired signed value.
+ int32_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
+
+let AddedComplexity = 40 in {
+ // Not using frameindex patterns for these stores, because the offset
+ // is not extendable. This could cause problems during removing the frame
+ // indices, since the offset with respect to R29/R30 may not fit in the
+ // u6 field.
+ def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
+ S4_storeirb_io>;
+ def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
+ S4_storeirh_io>;
+ def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
+ S4_storeiri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+// Store halfword.
+// TODO: needs to be implemented
+// memh(Re=#U6)=Rt.H
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io_nv <string mnemonic, RegisterClass RC,
+ Operand ImmOp, bits<2>MajOp>
+ : NVInst_V4 <(outs),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2) = $src3.new",
+ [],"",ST_tc_st_SLOT0> {
+ bits<5> src1;
+ bits<13> src2; // Actual address offset
+ bits<3> src3;
+ bits<11> offsetBits; // Represents offset encoding
+
+ let opExtentBits = !if (!eq(mnemonic, "memb"), 11,
+ !if (!eq(mnemonic, "memh"), 12,
+ !if (!eq(mnemonic, "memw"), 13, 0)));
+
+ let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+ !if (!eq(mnemonic, "memh"), 1,
+ !if (!eq(mnemonic, "memw"), 2, 0)));
+
+ let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0},
+ !if (!eq(mnemonic, "memh"), src2{11-1},
+ !if (!eq(mnemonic, "memw"), src2{12-2}, 0)));
+
+ let IClass = 0b1010;
+
+ let Inst{27} = 0b0;
+ let Inst{26-25} = offsetBits{10-9};
+ let Inst{24-21} = 0b1101;
+ let Inst{20-16} = src1;
+ let Inst{13} = offsetBits{8};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7-0} = offsetBits{7-0};
+ }
+
+let opExtendable = 2, opNewValue = 3, isPredicated = 1 in
+class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp,
+ bits<2>MajOp, bit PredNot, bit isPredNew>
+ : NVInst_V4 <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4),
+ !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2+#$src3) = $src4.new",
+ [],"",V2LDST_tc_st_SLOT0> {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src4;
+ bits<6> offsetBits; // Represents offset encoding
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = PredNot;
+ let opExtentBits = !if (!eq(mnemonic, "memb"), 6,
+ !if (!eq(mnemonic, "memh"), 7,
+ !if (!eq(mnemonic, "memw"), 8, 0)));
+
+ let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+ !if (!eq(mnemonic, "memh"), 1,
+ !if (!eq(mnemonic, "memw"), 2, 0)));
+
+ let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0},
+ !if (!eq(mnemonic, "memh"), src3{6-1},
+ !if (!eq(mnemonic, "memw"), src3{7-2}, 0)));
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b0;
+ let Inst{26} = PredNot;
+ let Inst{25} = isPredNew;
+ let Inst{24-21} = 0b0101;
+ let Inst{20-16} = src2;
+ let Inst{13} = offsetBits{5};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src4;
+ let Inst{7-3} = offsetBits{4-0};
+ let Inst{2} = 0b0;
+ let Inst{1-0} = src1;
+ }
+
+// multiclass for new-value store instructions with base + immediate offset.
+//
+let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0,
+ isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, Operand predImmOp, bits<2> MajOp> {
+
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+ def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>;
+ // Predicated
+ def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>;
+ def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>;
+ // Predicated new
+ def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+ MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+ MajOp, 1, 1>;
+ }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm" in {
+ let accessSize = ByteAccess in
+ defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+ u6_0Ext, 0b00>, AddrModeRel;
+
+ let accessSize = HalfWordAccess, opExtentAlign = 1 in
+ defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+ u6_1Ext, 0b01>, AddrModeRel;
+
+ let accessSize = WordAccess, opExtentAlign = 2 in
+ defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+ u6_2Ext, 0b10>, AddrModeRel;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1 in
+def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
+
+def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
+
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
+ : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+ (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
+ "$dst = "#mnemonic#"($src2++$src3)", [],
+ "$src1 = $dst, $src2 = $_dst_"> {
+ bits<5> dst;
+ bits<5> src2;
+ bits<1> src3;
+
+ let accessSize = AccessSz;
+ let IClass = 0b1001;
+
+ let Inst{27-25} = 0b110;
+ let Inst{24-21} = MajOp;
+ let Inst{20-16} = src2;
+ let Inst{13} = src3;
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = dst;
+ }
+
+def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment .new stores
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+ isNewValue = 1, opNewValue = 3 in
+class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
+ mnemonic#"($src1++#$offset) = $src2.new",
+ [], "$src1 = $_dst_">,
+ AddrModeRel {
+ bits<5> src1;
+ bits<3> src2;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0}));
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = src1;
+ let Inst{13} = 0b0;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src2;
+ let Inst{7} = 0b0;
+ let Inst{6-3} = offsetBits;
+ let Inst{1} = 0b0;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment .new stores
+// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+ isNewValue = 1, opNewValue = 4 in
+class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
+ bits<2> MajOp, bit isPredNot, bit isPredNew >
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2,
+ ImmOp:$offset, IntRegs:$src3),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+ [], "$src2 = $_dst_">,
+ AddrModeRel {
+ bits<2> src1;
+ bits<5> src2;
+ bits<3> src3;
+ bits<7> offset;
+ bits<4> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+ !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+ /* s4_0Imm */ offset{3-0}));
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = src2;
+ let Inst{13} = 0b1;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7} = isPredNew;
+ let Inst{6-3} = offsetBits;
+ let Inst{2} = isPredNot;
+ let Inst{1-0} = src1;
+ }
+
+multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp,
+ bits<2> MajOp, bit PredNot> {
+ def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>;
+
+ // Predicate new
+ def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>;
+}
+
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
+ bits<2> MajOp> {
+ let BaseOpcode = "POST_"#BaseOp in {
+ def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>;
+
+ // Predicated
+ defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>;
+ defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>;
+ }
+}
+
+let accessSize = ByteAccess in
+defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
+
+let accessSize = HalfWordAccess in
+defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
+
+let accessSize = WordAccess in
+defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment .new stores with register offset
+//===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
+ : NVInstPI_V4 <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3),
+ #mnemonic#"($src1++$src2) = $src3.new",
+ [], "$src1 = $_dst_"> {
+ bits<5> src1;
+ bits<1> src2;
+ bits<3> src3;
+ let accessSize = AccessSz;
+
+ let IClass = 0b1010;
+
+ let Inst{27-21} = 0b1101101;
+ let Inst{20-16} = src1;
+ let Inst{13} = src2;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src3;
+ let Inst{7} = 0b0;
+ }
+
+def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NV/J +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps with the register
+// operands.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
+class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
+ bit isNegCond, bit isTak>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic#
+ "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
+ "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
+ #!if(isTak, "t","nt")#" $offset", []> {
+
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> Ns; // New-Value Operand
+ bits<5> RegOp; // Non-New-Value Operand
+ bits<11> offset;
+
+ let isTaken = isTak;
+ let isPredicatedFalse = isNegCond;
+ let opNewValue{0} = NvOpNum;
+
+ let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
+ let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
+
+ let IClass = 0b0010;
+ let Inst{27-26} = 0b00;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = Ns;
+ let Inst{13} = isTak;
+ let Inst{12-8} = RegOp;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
+}
+
+
+multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
+ bit isNegCond> {
+ // Branch not taken:
+ def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+ // Branch taken:
+ def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
+}
+
+// NvOpNum = 0 -> First Operand is a new-value Register
+// NvOpNum = 1 -> Second Operand is a new-value Register
+
+multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
+ bit NvOpNum> {
+ let BaseOpcode = BaseOp#_NVJ in {
+ defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+ defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+ }
+}
+
+// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
+ defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
+ defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+ defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
+ defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an unsigned immediate (U5) operand.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
+class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
+ bit isTak>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
+ #!if(isTak, "t","nt")#" $offset", []> {
+
+ let isTaken = isTak;
+ let isPredicatedFalse = isNegCond;
+ let isTaken = isTak;
+
+ bits<3> src1;
+ bits<5> src2;
+ bits<11> offset;
+
+ let IClass = 0b0010;
+ let Inst{26} = 0b1;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = src1;
+ let Inst{13} = isTak;
+ let Inst{12-8} = src2;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
+}
+
+multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
+ // Branch not taken:
+ def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+ // Branch taken:
+ def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
+}
+
+multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
+ let BaseOpcode = BaseOp#_NVJri in {
+ defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+ defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+ }
+}
+
+// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+ defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+ defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an hardcoded 0/-1 immediate value.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
+ opExtentAlign = 2 in
+class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
+ bit isNegCond, bit isTak>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic
+ #"($src1.new, #"#ImmVal#")) jump:"
+ #!if(isTak, "t","nt")#" $offset", []> {
+
+ let isTaken = isTak;
+ let isPredicatedFalse = isNegCond;
+ let isTaken = isTak;
+
+ bits<3> src1;
+ bits<11> offset;
+ let IClass = 0b0010;
+ let Inst{26} = 0b1;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = src1;
+ let Inst{13} = isTak;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
+}
+
+multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
+ bit isNegCond> {
+ // Branch not taken:
+ def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+ // Branch taken:
+ def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
+}
+
+multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
+ string ImmVal> {
+ let BaseOpcode = BaseOp#_NVJ_ConstImm in {
+ defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
+ defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
+ }
+}
+
+// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
+// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
+ Defs = [PC], hasSideEffects = 0 in {
+ defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+ defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel;
+ defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel;
+}
+
+// J4_hintjumpr: Hint indirect conditional jump.
+let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def J4_hintjumpr: JRInst <
+ (outs),
+ (ins IntRegs:$Rs),
+ "hintjr($Rs)"> {
+ bits<5> Rs;
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010101;
+ let Inst{20-16} = Rs;
+ }
+
+//===----------------------------------------------------------------------===//
+// NV/J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+
+// PC-relative add
+let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
+ isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
+def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
+ "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
+ bits<5> Rd;
+ bits<6> u6;
+
+ let IClass = 0b0110;
+ let Inst{27-16} = 0b101001001001;
+ let Inst{12-7} = u6;
+ let Inst{4-0} = Rd;
+ }
+
+
+
+let hasSideEffects = 0 in
+class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
+ : CRInst<(outs PredRegs:$Pd),
+ (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu),
+ "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " #
+ !if (IsNeg,"!","") # "$Pu))",
+ [], "", CR_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<2> Ps;
+ bits<2> Pt;
+ bits<2> Pu;
+
+ let IClass = 0b0110;
+ let Inst{27-24} = 0b1011;
+ let Inst{23} = IsNeg;
+ let Inst{22-21} = OpBits;
+ let Inst{20} = 0b1;
+ let Inst{17-16} = Ps;
+ let Inst{13} = 0b0;
+ let Inst{9-8} = Pt;
+ let Inst{7-6} = Pu;
+ let Inst{1-0} = Pd;
+}
+
+def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
+def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>;
+def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>;
+def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>;
+def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
+def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>;
+def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
+def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or, C4_and_or>;
+def: LogLog_pat<or, and, C4_or_and>;
+def: LogLog_pat<or, or, C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or, C4_and_orn>;
+def: LogLogNot_pat<or, and, C4_or_andn>;
+def: LogLogNot_pat<or, or, C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonAtGot
+ : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
+
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+ (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+ (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+ (C4_addipc imm:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Logical with-not instructions.
+def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
+def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
+
+def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101111;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
+ opExtendable = 3 in
+def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
+ "$Rd = add($Rs, add($Ru, #$s6))" ,
+ [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
+ (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
+ "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Ru;
+ bits<6> s6;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b10110;
+ let Inst{22-21} = s6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = s6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = s6{2-0};
+ let Inst{4-0} = Ru;
+ }
+
+let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 6, opExtendable = 2 in
+def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
+ "$Rd = add($Rs, sub(#$s6, $Ru))",
+ [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<6> s6;
+ bits<5> Ru;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b10111;
+ let Inst{22-21} = s6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = s6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = s6{2-0};
+ let Inst{4-0} = Ru;
+ }
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
+ (i32 IntRegs:$src3))),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
+ (i32 IntRegs:$src3)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
+ (s32ImmPred:$src2)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
+// Extract bitfield
+// Rdd=extract(Rss,#u6,#U6)
+// Rdd=extract(Rss,Rtt)
+// Rd=extract(Rs,Rtt)
+// Rd=extract(Rs,#u5,#U5)
+
+def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
+def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>;
+
+let hasNewValue = 1 in {
+ def S4_extract_rp : T_S3op_extract<"extract", 0b01>;
+ def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>;
+}
+
+// Complex add/sub halfwords/words
+let Defs = [USR_OVF] in {
+ def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
+ def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
+ def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
+ def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
+}
+
+let Defs = [USR_OVF] in {
+ def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
+ def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
+}
+
+let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
+ def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
+ def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
+}
+
+// Logical xor with xor accumulation.
+// Rxx^=xor(Rss,Rtt)
+let hasSideEffects = 0 in
+def M4_xor_xacc
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Rxx ^= xor($Rss, $Rtt)",
+ [(set (i64 DoubleRegs:$Rxx),
+ (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss),
+ (i64 DoubleRegs:$Rtt))))],
+ "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b101010;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rxx;
+ }
+
+// Rotate and reduce bytes
+// Rdd=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate
+ : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+ "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
+ [], "", S_3op_tc_3x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rt;
+ bits<2> u2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b001111;
+ let Inst{20-16} = Rss;
+ let Inst{13} = u2{1};
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = 0b11;
+ let Inst{5} = u2{0};
+ let Inst{4-0} = Rdd;
+ }
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate_acc
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+ "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
+ "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+ bits<2> u2;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rss;
+ let Inst{13} = u2{1};
+ let Inst{12-8} = Rt;
+ let Inst{5} = u2{0};
+ let Inst{4-0} = Rxx;
+ }
+
+// Vector reduce conditional negate halfwords
+let hasSideEffects = 0 in
+def S2_vrcnegh
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rxx += vrcnegh($Rss, $Rt)", [],
+ "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011001;
+ let Inst{20-16} = Rss;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b111;
+ let Inst{4-0} = Rxx;
+ }
+
+// Split bitfield
+def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
+
+// Arithmetic/Convergent round
+def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
+
+def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>;
+
+let Defs = [USR_OVF] in
+def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
+
+// Logical-logical words.
+// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
+ opExtendable = 3 in
+def S4_or_andix:
+ ALU64Inst<(outs IntRegs:$Rx),
+ (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
+ "$Rx = or($Ru, and($_src_, #$s10))" ,
+ [(set (i32 IntRegs:$Rx),
+ (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
+ "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
+ bits<5> Rx;
+ bits<5> Ru;
+ bits<10> s10;
+
+ let IClass = 0b1101;
+
+ let Inst{27-22} = 0b101001;
+ let Inst{20-16} = Rx;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Ru;
+ }
+
+// Miscellaneous ALU64 instructions.
+//
+let hasNewValue = 1, hasSideEffects = 0 in
+def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0011111;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b111;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0100;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b0100;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101100;
+ let Inst{20-16} = Rs;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b0;
+ let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0101100;
+ let Inst{20-16} = Rt;
+ let Inst{12-8} = Rs;
+ let Inst{7} = 0b1;
+ let Inst{4-0} = Rd;
+}
+
+// Rx[&|]=xor(Rs,Rt)
+def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
+def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
+
+// Rx[&|^]=or(Rs,Rt)
+def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>;
+
+let CextOpcode = "ORr_ORr" in
+def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>;
+def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>;
+
+// Rx[&|^]=and(Rs,Rt)
+def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>;
+
+let CextOpcode = "ORr_ANDr" in
+def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>;
+def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
+
+// Rx[&|^]=and(Rs,~Rt)
+def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
+def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
+def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
+ (not IntRegs:$src3)))),
+ (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
+
+// Compound or-or and or-and
+let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
+ opExtentBits = 10, opExtendable = 3 in
+class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
+ : MInst_acc <(outs IntRegs:$Rx),
+ (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
+ "$Rx |= "#mnemonic#"($Rs, #$s10)",
+ [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
+ (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
+ "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<10> s10;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1010;
+ let Inst{23-22} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{21} = s10{9};
+ let Inst{13-5} = s10{8-0};
+ let Inst{4-0} = Rx;
+ }
+
+let CextOpcode = "ORr_ANDr" in
+def S4_or_andi : T_CompOR <"and", 0b00, and>;
+
+let CextOpcode = "ORr_ORr" in
+def S4_or_ori : T_CompOR <"or", 0b10, or>;
+
+// Modulo wrap
+// Rd=modwrap(Rs,Rt)
+// Round
+// Rd=cround(Rs,#u5)
+// Rd=cround(Rs,Rt)
+// Rd=round(Rs,#u5)[:sat]
+// Rd=round(Rs,Rt)[:sat]
+// Vector reduce add unsigned halfwords
+// Rd=vraddh(Rss,Rtt)
+// Vector add bytes
+// Rdd=vaddb(Rss,Rtt)
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+// Rxx+=vrcnegh(Rss,Rt)
+// Vector maximum bytes
+// Rdd=vmaxb(Rtt,Rss)
+// Vector reduce maximum halfwords
+// Rxx=vrmaxh(Rss,Ru)
+// Rxx=vrmaxuh(Rss,Ru)
+// Vector reduce maximum words
+// Rxx=vrmaxuw(Rss,Ru)
+// Rxx=vrmaxw(Rss,Ru)
+// Vector minimum bytes
+// Rdd=vminb(Rtt,Rss)
+// Vector reduce minimum halfwords
+// Rxx=vrminh(Rss,Ru)
+// Rxx=vrminuh(Rss,Ru)
+// Vector reduce minimum words
+// Rxx=vrminuw(Rss,Ru)
+// Rxx=vrminw(Rss,Ru)
+// Vector subtract bytes
+// Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT +
+//===----------------------------------------------------------------------===//
+
+// Bit reverse
+def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
+
+// Bit count
+def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
+def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
+def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
+
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
+ "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ bits<6> s6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = s6;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
+ "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+ bits<5> Rs;
+ bits<5> Rd;
+ bits<6> s6;
+ let IClass = 0b1000;
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = 0b011;
+ let Inst{20-16} = Rs;
+ let Inst{13-8} = s6;
+ let Inst{7-5} = 0b010;
+ let Inst{4-0} = Rd;
+}
+
+
+// Bit test/set/clear
+def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
+def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>;
+ def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
+}
+
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+// if ([!]tstbit(...)) jump ...
+let AddedComplexity = 100 in
+def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
+def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
+def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
+
+// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
+// represented as a compare against "value & 0xFF", which is an exact match
+// for cmpb (same for cmph). The patterns below do not contain any additional
+// complexity that would make them preferable, and if they were actually used
+// instead of cmpb/cmph, they would result in a compare against register that
+// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
+def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)),
+ (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
+ (C4_nbitsclr I32:$Rs, I32:$Rt)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
+ (C4_nbitsset I32:$Rs, I32:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
+
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
+def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
+ (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
+ "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
+ [(set (i32 IntRegs:$Rd),
+ (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
+ u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ bits<5> Rd;
+ bits<6> u6;
+ bits<5> Rs;
+ bits<6> U6;
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23} = U6{5};
+ let Inst{22-21} = u6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = u6{3};
+ let Inst{12-8} = Rd;
+ let Inst{7-5} = u6{2-0};
+ let Inst{4-0} = U6{4-0};
+ }
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
+ isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
+def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
+ (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
+ [(set (i32 IntRegs:$Rd),
+ (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
+ "", ALU64_tc_3x_SLOT23>, ImmRegRel {
+ bits<5> Rd;
+ bits<6> u6;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b01110;
+ let Inst{22-21} = u6{5-4};
+ let Inst{20-16} = Rs;
+ let Inst{13} = u6{3};
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = u6{2-0};
+ let Inst{4-0} = Rd;
+ }
+
+let hasNewValue = 1 in
+class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
+ : ALU64Inst <(outs IntRegs:$dst), ins,
+ "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
+ "#$src2, $src3))"),
+ [(set (i32 IntRegs:$dst),
+ (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))],
+ "", ALU64_tc_3x_SLOT23> {
+ bits<5> dst;
+ bits<5> src1;
+ bits<8> src2;
+ bits<5> src3;
+
+ let IClass = 0b1101;
+
+ bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2});
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23} = MajOp;
+ let Inst{22-21} = ImmValue{5-4};
+ let Inst{20-16} = src3;
+ let Inst{13} = ImmValue{3};
+ let Inst{12-8} = dst;
+ let Inst{7-5} = ImmValue{2-0};
+ let Inst{4-0} = src1;
+ }
+
+def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
+ (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
+
+let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
+ CextOpcode = "ADD_MPY", InputType = "imm" in
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
+ (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
+def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
+ (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
+ "$Rx = add($Ru, mpyi($_src_, $Rs))",
+ [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru),
+ (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))],
+ "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
+ bits<5> Rx;
+ bits<5> Ru;
+ bits<5> Rs;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b0011000;
+ let Inst{12-8} = Rx;
+ let Inst{4-0} = Ru;
+ let Inst{20-16} = Rs;
+ }
+
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
+def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
+def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
+def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
+def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
+def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
+def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
+def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
+
+// Rxx^=vpmpyh(Rs,Rt)
+def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
+
+// Rxx^=pmpyw(Rs,Rt)
+def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/Vector compare
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
+ : ALU64_rr <(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
+ "$Pd = "#Str#"($Rss, #$Imm)",
+ [], "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<32> Imm;
+ bits<8> ImmBits;
+ let ImmBits{6-0} = Imm{6-0};
+ let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
+
+ let IClass = 0b1101;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{22-21} = cmpOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-5} = ImmBits;
+ let Inst{4-3} = minOp;
+ let Inst{1-0} = Pd;
+ }
+
+// Vector compare bytes
+def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>;
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
+def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
+
+def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>;
+def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>;
+def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>;
+
+// Vector compare halfwords
+def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>;
+def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>;
+def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>;
+
+// Vector compare words
+def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>;
+def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>;
+def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate and accumulate/logical.
+// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5))
+// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5))
+// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5))
+// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+ hasNewValue = 1, opNewValue = 0 in
+class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
+ bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
+ : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
+ "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
+ [(set (i32 IntRegs:$Rd),
+ (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
+ "$Rd = $Rx", Itin> {
+
+ bits<5> Rd;
+ bits<8> u8;
+ bits<5> Rx;
+ bits<5> U5;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = 0b1110;
+ let Inst{23-21} = u8{7-5};
+ let Inst{20-16} = Rd;
+ let Inst{13} = u8{4};
+ let Inst{12-8} = U5;
+ let Inst{7-5} = u8{3-1};
+ let Inst{4} = asl_lsr;
+ let Inst{3} = u8{0};
+ let Inst{2-1} = MajOp;
+}
+
+multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
+ InstrItinClass Itin> {
+ def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>;
+ def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
+}
+
+let AddedComplexity = 200 in {
+ defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
+ defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
+}
+
+let AddedComplexity = 30 in
+defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>;
+
+defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
+
+let AddedComplexity = 200 in {
+ def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+ (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+ (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+ (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+ (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+}
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>;
+
+// Rd=[cround|round](Rs,Rt)
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
+ def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
+ def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
+}
+
+// Rd=round(Rs,Rt):sat
+let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
+
+// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+ def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
+ def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
+}
+
+// Rdd=[add|sub](Rss,Rtt,Px):carry
+let isPredicateLate = 1, hasSideEffects = 0 in
+class T_S3op_carry <string mnemonic, bits<3> MajOp>
+ : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+ "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry",
+ [], "$Px = $Pu", S_3op_tc_1_SLOT23 > {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<5> Rtt;
+ bits<2> Pu;
+
+ let IClass = 0b1100;
+
+ let Inst{27-24} = 0b0010;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rdd;
+ }
+
+def A4_addp_c : T_S3op_carry < "add", 0b110 >;
+def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
+
+let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
+class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
+ : SInst <(outs DoubleRegs:$Rxx),
+ (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
+ "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
+ [] , "$dst2 = $Rxx"> {
+ bits<5> Rxx;
+ bits<5> Rss;
+ bits<5> Ru;
+
+ let IClass = 0b1100;
+
+ let Inst{27-21} = 0b1011001;
+ let Inst{20-16} = Rss;
+ let Inst{13} = isUnsigned;
+ let Inst{12-8} = Rxx;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Ru;
+ }
+
+// Vector reduce maximum halfwords
+// Rxx=vrmax[u]h(Rss,Ru)
+def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>;
+def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
+
+// Vector reduce maximum words
+// Rxx=vrmax[u]w(Rss,Ru)
+def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>;
+def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
+
+// Vector reduce minimum halfwords
+// Rxx=vrmin[u]h(Rss,Ru)
+def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>;
+def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
+
+// Vector reduce minimum words
+// Rxx=vrmin[u]w(Rss,Ru)
+def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>;
+def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
+
+// Shift an immediate left by register amount.
+let hasNewValue = 1, hasSideEffects = 0 in
+def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
+ "$Rd = lsl(#$s6, $Rt)" ,
+ [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
+ (i32 IntRegs:$Rt)))],
+ "", S_3op_tc_1_SLOT23> {
+ bits<5> Rd;
+ bits<6> s6;
+ bits<5> Rt;
+
+ let IClass = 0b1100;
+
+ let Inst{27-22} = 0b011010;
+ let Inst{20-16} = s6{5-1};
+ let Inst{12-8} = Rt;
+ let Inst{7-6} = 0b11;
+ let Inst{4-0} = Rd;
+ let Inst{5} = s6{0};
+ }
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+def MEMOPIMM : SDNodeXForm<imm, [{
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int32_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 65535..65515
+ // assigning to a short restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int16_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 255..235
+ // assigning to a char restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int8_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-31].
+ // As an SDNode.
+ // we bit negate the value first
+ int32_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-15].
+ // As an SDNode.
+ int16_t imm = N->getSExtValue();
+ return XformMskToBitPosU4Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-15].
+ // As an SDNode.
+ // we bit negate the value first
+ int16_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU4Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-7].
+ // As an SDNode.
+ int8_t imm = N->getSExtValue();
+ return XformMskToBitPosU3Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-7].
+ // As an SDNode.
+ // we bit negate the value first
+ int8_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU3Imm(imm, SDLoc(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4<(outs),
+ (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+ opc#"($base+#$offset)"#memOp#"$delta",
+ []>,
+ Requires<[UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let opExtentAlign = opcBits;
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1110;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the immediate value.
+//===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4 <(outs),
+ (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+ opc#"($base+#$offset)"#memOp#"#$delta"
+ #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+ []>,
+ Requires<[UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let opExtentAlign = opcBits;
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1111;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+ def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+ def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+ def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+ def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
+
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+ def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+ def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+ def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>;
+ def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>;
+}
+
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+ defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>;
+ defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>;
+}
+
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
+ let opExtentBits = 6, accessSize = ByteAccess in
+ defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+ let opExtentBits = 7, accessSize = HalfWordAccess in
+ defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+ let opExtentBits = 8, accessSize = WordAccess in
+ defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+ InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 180 in
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
+
+ let AddedComplexity = 190 in
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
+ u5ImmPred:$addend),
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
+}
+
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+ InstHexagon addMI, InstHexagon subMI> {
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
+}
+
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+ L4_iadd_memoph_io, L4_isub_memoph_io>;
+ // Byte
+ defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+ L4_iadd_memopb_io, L4_isub_memopb_io>;
+}
+
+let Predicates = [UseMEMOP] in {
+ defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
+ L4_isub_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+ PatLeaf immPred, SDNodeXForm xformFunc,
+ InstHexagon MI> {
+ let AddedComplexity = 190 in
+ def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
+
+ let AddedComplexity = 195 in
+ def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
+ immPred:$subend),
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
+}
+
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
+ MEMOPIMM_HALF, L4_isub_memoph_io>;
+ // Byte
+ defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
+ MEMOPIMM_BYTE, L4_isub_memopb_io>;
+}
+
+let Predicates = [UseMEMOP] in {
+ defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
+ MEMOPIMM, L4_isub_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+ PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI,
+ SDNode OpNode> {
+
+ // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+ let AddedComplexity = 250 in
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$bitend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+ // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+ let AddedComplexity = 225 in
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
+ // Byte - clrbit
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
+ CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
+ // Byte - setbit
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
+ SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
+ // Half Word - clrbit
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
+ CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
+ // Half Word - setbit
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
+ SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
+}
+
+let Predicates = [UseMEMOP] in {
+ // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+ // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+ defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
+
+ // memw(Rs+#0) = [clrbit|setbit](#U5)
+ // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+ defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
+ L4_iand_memopw_io, and>;
+ defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
+ L4_ior_memopw_io, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 141 in
+ // mem[bhw](Rs+#0) [+-&|]= Rt
+ def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>;
+
+ // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+ let AddedComplexity = 150 in
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ InstHexagon addMI, InstHexagon subMI,
+ InstHexagon andMI, InstHexagon orMI> {
+ defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>;
+ defm: MemOpr_Pats <ldOp, stOp, extPred, orMI, or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+ L4_add_memoph_io, L4_sub_memoph_io,
+ L4_and_memoph_io, L4_or_memoph_io>;
+ // Byte
+ defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+ L4_add_memopb_io, L4_sub_memopb_io,
+ L4_and_memopb_io, L4_or_memopb_io>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [UseMEMOP] in {
+ // Byte, Half Word
+ defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend
+ // Word
+ defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
+ L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
+// Pd=cmpb.eq(Rs,#u8)
+
+// p=!cmp.eq(r1,#s10)
+def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>;
+def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>;
+def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
+
+def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
+
+// rs <= rt -> !(rs > rt).
+/*
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
+*/
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformU7ToU7M1Imm(imm, SDLoc(N));
+}]>;
+
+// For the sequence
+// zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+// biasing the expression by 48. We are depending on the representation of
+// the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+// retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+// retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7StrictPosImmPred:$src2)))),
+ (C2_muxii (A4_cmpbgtui IntRegs:$src1,
+ (DEC_CONST_BYTE u7StrictPosImmPred:$src2)),
+ 0, 1)>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Multiclass for DeallocReturn
+//===----------------------------------------------------------------------===//
+class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak>
+ : LD0Inst<(outs), (ins PredRegs:$src),
+ !if(isNot, "if (!$src", "if ($src")#
+ !if(isPredNew, ".new) ", ") ")#mnemonic#
+ !if(isPredNew, #!if(isTak,":t", ":nt"),""),
+ [], "", LD_tc_3or4stall_SLOT0> {
+
+ bits<2> src;
+ let BaseOpcode = "L4_RETURN";
+ let isPredicatedFalse = isNot;
+ let isPredicatedNew = isPredNew;
+ let isTaken = isTak;
+ let IClass = 0b1001;
+
+ let Inst{27-16} = 0b011000011110;
+
+ let Inst{13} = isNot;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{10} = 0b0;
+ let Inst{9-8} = src;
+ let Inst{4-0} = 0b11110;
+ }
+
+// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt
+multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> {
+ let isPredicated = 1 in {
+ def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>;
+ def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>;
+ def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>;
+ }
+}
+
+multiclass LD_MISC_L4_RETURN<string mnemonic> {
+ let isBarrier = 1, isPredicable = 1 in
+ def NAME : LD0Inst <(outs), (ins), mnemonic, [], "",
+ LD_tc_3or4stall_SLOT0> {
+ let BaseOpcode = "L4_RETURN";
+ let IClass = 0b1001;
+ let Inst{27-16} = 0b011000011110;
+ let Inst{13-10} = 0b0000;
+ let Inst{4-0} = 0b11110;
+ }
+ defm t : L4_RETURN_PRED<mnemonic, 0 >;
+ defm f : L4_RETURN_PRED<mnemonic, 1 >;
+}
+
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
+defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
+}
+
+// Save registers function call.
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1 in
+class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<2>MajOp, bit isAbs, bit isHalf>
+ : STInst<(outs), (ins ImmOp:$addr, RC:$src),
+ mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
+ [], "", V2LDST_tc_st_SLOT01> {
+ bits<19> addr;
+ bits<5> src;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b0100;
+ let Inst{27} = 1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24} = 0b0;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = isHalf;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13} = offsetBits{8};
+ let Inst{12-8} = src;
+ let Inst{7-0} = offsetBits{7-0};
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
+class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
+ bit isHalf, bit isNot, bit isNew>
+ : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+ ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
+ [], "", ST_tc_st_SLOT01>, AddrModeRel {
+ bits<2> src1;
+ bits<6> absaddr;
+ bits<5> src2;
+
+ let isPredicatedNew = isNew;
+ let isPredicatedFalse = isNot;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = MajOp;
+ let Inst{21} = isHalf;
+ let Inst{17-16} = absaddr{5-4};
+ let Inst{13} = isNew;
+ let Inst{12-8} = src2;
+ let Inst{7} = 0b1;
+ let Inst{6-3} = absaddr{3-0};
+ let Inst{2} = isNot;
+ let Inst{1-0} = src1;
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<2> MajOp, bit isHalf>
+ : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
+ AddrModeRel {
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass for store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let addrMode = Absolute, isExtended = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
+
+ // Predicated
+ def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
+ def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>;
+
+ // .new Predicated
+ def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>;
+ def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated new-value store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
+ isNewValue = 1, opNewValue = 1 in
+class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
+ : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
+ mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
+ [], "", V2LDST_tc_st_SLOT0> {
+ bits<19> addr;
+ bits<3> src;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+ let IClass = 0b0100;
+
+ let Inst{27} = 1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24-21} = 0b0101;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13} = offsetBits{8};
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src;
+ let Inst{7-0} = offsetBits{7-0};
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
+ isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
+class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
+ : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+ ") ")#mnemonic#"(#$absaddr) = $src2.new",
+ [], "", ST_tc_st_SLOT0>, AddrModeRel {
+ bits<2> src1;
+ bits<6> absaddr;
+ bits<3> src2;
+
+ let isPredicatedNew = isNew;
+ let isPredicatedFalse = isNot;
+
+ let IClass = 0b1010;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = 0b101;
+ let Inst{17-16} = absaddr{5-4};
+ let Inst{13} = isNew;
+ let Inst{12-11} = MajOp;
+ let Inst{10-8} = src2;
+ let Inst{7} = 0b1;
+ let Inst{6-3} = absaddr{3-0};
+ let Inst{2} = isNot;
+ let Inst{1-0} = src1;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
+ : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 1>, AddrModeRel {
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass for new-value store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let addrMode = Absolute, isExtended = 1 in
+multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
+ bits<2> MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
+
+ // Predicated
+ def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
+ def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>;
+
+ // .new Predicated
+ def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>;
+ def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Stores with absolute addressing
+//===----------------------------------------------------------------------===//
+let accessSize = ByteAccess in
+defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
+ ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
+
+let accessSize = HalfWordAccess in
+defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
+ ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
+
+let accessSize = WordAccess in
+defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
+ ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
+
+let isNVStorable = 0, accessSize = DoubleWordAccess in
+defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
+
+let isNVStorable = 0, accessSize = HalfWordAccess in
+defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
+
+//===----------------------------------------------------------------------===//
+// GP-relative stores.
+// mem[bhwd](#global)=Rt
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in
+class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
+ // Set BaseOpcode same as absolute addressing instructions so that
+ // non-predicated GP-Rel instructions can have relate with predicated
+ // Absolute instruction.
+ let BaseOpcode = BaseOp#_abs;
+ }
+
+let isAsmParserOnly = 1 in
+multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
+ bits<2> MajOp, bit isHalf = 0> {
+ // Set BaseOpcode same as absolute addressing instructions so that
+ // non-predicated GP-Rel instructions can have relate with predicated
+ // Absolute instruction.
+ let BaseOpcode = BaseOp#_abs in {
+ def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
+ 0, isHalf>;
+ // New-value store
+ def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
+ }
+}
+
+let accessSize = ByteAccess in
+defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel;
+
+let accessSize = HalfWordAccess in
+defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel;
+
+let accessSize = WordAccess in
+defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel;
+
+let isNVStorable = 0, accessSize = DoubleWordAccess in
+def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs,
+ u16_3Imm, 0b11>, PredNewRel;
+
+let isNVStorable = 0, accessSize = HalfWordAccess in
+def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
+ u16_1Imm, 0b01, 1>, PredNewRel;
+
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr),
+ (MI Addr:$addr, (ValueMod Value:$val))>;
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<store, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
+
+ // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+ // to "r0 = 1; memw(#foo) = r0"
+ let AddedComplexity = 100 in
+ def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated load instructions with
+// absolute addressing mode.
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0 in
+class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp>
+ : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
+ "$dst = "#mnemonic# "(#$addr)",
+ [], "", V2LDST_tc_ld_SLOT01> {
+ bits<5> dst;
+ bits<19> addr;
+ bits<16> offsetBits;
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+ !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+ !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+ /* u16_0Imm */ addr{15-0})));
+
+ let IClass = 0b0100;
+
+ let Inst{27} = 0b1;
+ let Inst{26-25} = offsetBits{15-14};
+ let Inst{24} = 0b1;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = offsetBits{13-9};
+ let Inst{13-5} = offsetBits{8-0};
+ let Inst{4-0} = dst;
+ }
+
+class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp>
+ : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
+
+ string ImmOpStr = !cast<string>(ImmOp);
+ let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+ /* u16_0Imm */ 16)));
+
+ let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+ !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+ !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+ /* u16_0Imm */ 0)));
+ }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated load instructions with
+// absolute addressing mode.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
+ opExtendable = 2 in
+class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit isPredNot, bit isPredNew>
+ : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
+ !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
+ bits<5> dst;
+ bits<2> src1;
+ bits<6> absaddr;
+
+ let isPredicatedNew = isPredNew;
+ let isPredicatedFalse = isPredNot;
+ let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+ let IClass = 0b1001;
+
+ let Inst{27-24} = 0b1111;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = absaddr{5-1};
+ let Inst{13} = 0b1;
+ let Inst{12} = isPredNew;
+ let Inst{11} = isPredNot;
+ let Inst{10-9} = src1;
+ let Inst{8} = absaddr{0};
+ let Inst{7} = 0b1;
+ let Inst{4-0} = dst;
+ }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp,
+ bit PredNot> {
+ def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>;
+ // Predicate new
+ def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>;
+}
+
+let addrMode = Absolute, isExtended = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
+ Operand ImmOp, bits<3> MajOp> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 1, isPredicable = 1 in
+ def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
+
+ // Predicated
+ defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
+ defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>;
+ }
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+ defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
+ defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+ defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
+ defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess in
+defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in
+class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
+ bits<3> MajOp>
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
+ let BaseOpcode = BaseOp#_abs;
+ }
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+ def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
+ def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+ def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
+ def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess in
+def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
+def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+ : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+ (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi8, L2_loadrbgp>;
+ def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
+ def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
+ def: LoadGP_pats <extloadi16, L2_loadrhgp>;
+ def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+ def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+ def: LoadGP_pats <load, L2_loadrigp>;
+ def: LoadGP_pats <load, L2_loadrdgp, i64>;
+}
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+ def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
+
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+
+let AddedComplexity = 30 in {
+ def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+ def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>;
+}
+
+let AddedComplexity = 30 in {
+ def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>;
+ def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
+}
+
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 100 in
+def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
+
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>;
+
+ def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>;
+ def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>;
+
+ def: Loada_pat<load, i32, addrga, L4_loadri_abs>;
+ def: Loada_pat<load, i64, addrga, L4_loadrd_abs>;
+}
+
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
+ def: Storea_pat<store, I32, addrga, S2_storeriabs>;
+ def: Storea_pat<store, I64, addrga, S2_storerdabs>;
+
+ def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
+}
+
+// i8/i16/i32 -> i64 loads
+// We need a complexity of 120 here to override preceding handling of
+// zextload.
+let AddedComplexity = 120 in {
+ def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>;
+ def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>;
+ def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>;
+
+ def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
+ def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>;
+ def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
+
+ def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>;
+ def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>;
+ def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>;
+}
+
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>;
+
+ def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>;
+ def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>;
+
+ def: Loada_pat<load, i32, addrgp, L4_loadri_abs>;
+ def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>;
+}
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>;
+ def: Storea_pat<store, I32, addrgp, S2_storeriabs>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdabs>;
+}
+
+def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+ IntRegs:$c, IntRegs:$d),
+ ".error \"Should never try to emit Insert4\"",
+ [(set (i64 DoubleRegs:$dst),
+ (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+ (i32 16)),
+ (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+ (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+ (i32 32))),
+ (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
+
+//===----------------------------------------------------------------------===//
+// :raw for of boundscheck:hi:lo insns
+//===----------------------------------------------------------------------===//
+
+// A4_boundscheck_lo: Detect if a register is within bounds.
+let hasSideEffects = 0 in
+def A4_boundscheck_lo: ALU64Inst <
+ (outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = boundscheck($Rss, $Rtt):raw:lo"> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = 0b1;
+ let Inst{7-5} = 0b100;
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+// A4_boundscheck_hi: Detect if a register is within bounds.
+let hasSideEffects = 0 in
+def A4_boundscheck_hi: ALU64Inst <
+ (outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+ "$Pd = boundscheck($Rss, $Rtt):raw:hi"> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> Rtt;
+
+ let IClass = 0b1101;
+
+ let Inst{27-23} = 0b00100;
+ let Inst{13} = 0b1;
+ let Inst{7-5} = 0b101;
+ let Inst{1-0} = Pd;
+ let Inst{20-16} = Rss;
+ let Inst{12-8} = Rtt;
+ }
+
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def A4_boundscheck : MInst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+ "$Pd=boundscheck($Rs,$Rtt)">;
+
+// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
+let isPredicateLate = 1, hasSideEffects = 0 in
+def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
+ (ins DoubleRegs:$Rs, IntRegs:$Rt),
+ "$Pd = tlbmatch($Rs, $Rt)",
+ [], "", ALU64_tc_2early_SLOT23> {
+ bits<2> Pd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1101;
+ let Inst{27-23} = 0b00100;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b1;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = 0b011;
+ let Inst{1-0} = Pd;
+ }
+
+// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
+// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
+// We don't really want either one here.
+def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
+def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
+ [SDNPHasChain]>;
+
+// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
+// really do a load.
+let hasSideEffects = 1, mayLoad = 0 in
+def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
+ "dcfetch($Rs + #$u11_3)",
+ [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
+ "", LD_tc_ld_SLOT0> {
+ bits<5> Rs;
+ bits<14> u11_3;
+
+ let IClass = 0b1001;
+ let Inst{27-21} = 0b0100000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{10-0} = u11_3{13-3};
+}
+
+//===----------------------------------------------------------------------===//
+// Compound instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
+ opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+ isTerminator = 1 in
+class CJInst_tstbit_R0<string px, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+ ""#px#" = tstbit($Rs, #0); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{24-23} = 0b11;
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{9-8} = 0b11;
+ let Inst{7-1} = r9_2{8-2};
+}
+
+let Defs = [PC, P0], Uses = [P0] in {
+ def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
+ def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
+ def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
+ def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
+}
+
+let Defs = [PC, P1], Uses = [P1] in {
+ def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
+ def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
+ def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
+ def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">;
+}
+
+
+let isBranch = 1, hasSideEffects = 0,
+ isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
+ isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
+ opExtendable = 2, isTerminator = 1 in
+class CJInst_RR<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs, $Rt); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+ bits<4> Rs;
+ bits<4> Rt;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-23} = !if (!eq(op, "eq"), 0b01000,
+ !if (!eq(op, "gt"), 0b01001,
+ !if (!eq(op, "gtu"), 0b01010, 0)));
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ // px: Predicate reg 0/1
+ let Inst{12} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{11-8} = Rt;
+ let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RR<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RR<string op>{
+ defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>;
+ defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
+}
+// TypeCJ Instructions compare RR and jump
+defm eq : T_pnp_CJInst_RR<"eq">;
+defm gt : T_pnp_CJInst_RR<"gt">;
+defm gtu : T_pnp_CJInst_RR<"gtu">;
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
+class CJInst_RU5<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs, #$U5); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+ bits<4> Rs;
+ bits<5> U5;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ // px: Predicate reg 0/1
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+ let Inst{24-23} = !if (!eq(op, "eq"), 0b00,
+ !if (!eq(op, "gt"), 0b01,
+ !if (!eq(op, "gtu"), 0b10, 0)));
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{12-8} = U5;
+ let Inst{7-1} = r9_2{8-2};
+}
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RU5<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RU5<string op>{
+ defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>;
+ defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
+}
+// TypeCJ Instructions compare RI and jump
+defm eq : T_pnp_CJInst_RU5<"eq">;
+defm gt : T_pnp_CJInst_RU5<"gt">;
+defm gtu : T_pnp_CJInst_RU5<"gtu">;
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+ isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
+ isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+ isTerminator = 1 in
+class CJInst_Rn1<string px, string op, bit np, string tnt>
+ : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+ ""#px#" = cmp."#op#"($Rs,#-1); if ("
+ #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ // np: !p[01]
+ let isPredicatedFalse = np;
+ // tnt: Taken/Not Taken
+ let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+ let isTaken = !if (!eq(tnt, "t"), 1, 0);
+
+ let IClass = 0b0001;
+ let Inst{27-26} = 0b00;
+ let Inst{25} = !if (!eq(px, "!p1"), 1,
+ !if (!eq(px, "p1"), 1, 0));
+
+ let Inst{24-23} = 0b11;
+ let Inst{22} = np;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rs;
+ let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+ let Inst{9-8} = !if (!eq(op, "eq"), 0b00,
+ !if (!eq(op, "gt"), 0b01, 0));
+ let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_Rn1<string op, bit np> {
+ let Defs = [PC, P0], Uses = [P0] in {
+ def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">;
+ def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">;
+ }
+ let Defs = [PC, P1], Uses = [P1] in {
+ def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">;
+ def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">;
+ }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_Rn1<string op>{
+ defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>;
+ defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
+}
+// TypeCJ Instructions compare -1 and jump
+defm eq : T_pnp_CJInst_Rn1<"eq">;
+defm gt : T_pnp_CJInst_Rn1<"gt">;
+
+// J4_jumpseti: Direct unconditional jump and set register to immediate.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+ isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2 in
+def J4_jumpseti: CJInst <
+ (outs IntRegs:$Rd),
+ (ins u6Imm:$U6, brtarget:$r9_2),
+ "$Rd = #$U6 ; jump $r9_2"> {
+ bits<4> Rd;
+ bits<6> U6;
+ bits<11> r9_2;
+
+ let IClass = 0b0001;
+ let Inst{27-24} = 0b0110;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{19-16} = Rd;
+ let Inst{13-8} = U6;
+ let Inst{7-1} = r9_2{8-2};
+ }
+
+// J4_jumpsetr: Direct unconditional jump and transfer register.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+ isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+ opExtentAlign = 2, opExtendable = 2 in
+def J4_jumpsetr: CJInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, brtarget:$r9_2),
+ "$Rd = $Rs ; jump $r9_2"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<11> r9_2;
+
+ let IClass = 0b0001;
+ let Inst{27-24} = 0b0111;
+ let Inst{21-20} = r9_2{10-9};
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rs;
+ let Inst{7-1} = r9_2{8-2};
+ }
+
+// Duplex instructions
+//===----------------------------------------------------------------------===//
+include "HexagonIsetDx.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
new file mode 100644
index 0000000..823961f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -0,0 +1,937 @@
+//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V5 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY
+//===----------------------------------------------------------------------===//
+
+ //Rdd[+]=vrmpybsu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+ def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
+ def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
+
+ //Rdd[+]=vrmpybu(Rss,Rtt)
+ def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
+ def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
+
+ def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
+ def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
+}
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+let Predicates = [HasV5T] in {
+ def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
+ def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>;
+
+ // Rxx+=vmpyb[s]u(Rs,Rt)
+ def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
+ def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
+
+ // Rd=vaddhub(Rss,Rtt):sat
+ let hasNewValue = 1, opNewValue = 0 in
+ def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
+}
+
+def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
+ [(set I64:$dst,
+ (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
+ (i32 1)))], 1>,
+ Requires<[HasV5T]> {
+ bits<6> src2;
+ let Inst{13-8} = src2;
+}
+
+let isAsmParserOnly = 1 in
+def S2_asr_i_p_rnd_goodsyntax
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asrrnd($src1, #$src2)">;
+
+def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
+ Requires<[HasV5T]> {
+ let Inst{13,7,4} = 0b111;
+}
+
+def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
+ Requires<[HasV5T]> {
+ let Inst{20,13,7,4} = 0b1111;
+}
+
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>,
+ SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set F32:$dst,
+ (HexagonFCONST32 tglobaladdr:$global))]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
+ "$dst = CONST64(#$src1)",
+ [(set F64:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
+ "$dst = CONST32(#$src1)",
+ [(set F32:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+// Transfer immediate float.
+// Only works with single precision fp value.
+// For double precision, use CONST64_float_real, as 64bit transfer
+// can only hold 40-bit values - 32 from const ext + 8 bit immediate.
+// Make sure that complexity is more than the CONST32 pattern in
+// HexagonInstrInfo.td patterns.
+let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
+ isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
+ isCodeGenOnly = 1, isPseudo = 1 in
+def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
+ "$dst = #$src1",
+ [(set F32:$dst, fpimm:$src1)]>,
+ Requires<[HasV5T]>;
+
+let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
+ validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
+def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32Ext:$src2),
+ "if ($src1) $dst = #$src2", []>,
+ Requires<[HasV5T]>;
+
+let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
+ hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
+def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32Ext:$src2),
+ "if (!$src1) $dst = #$src2", []>,
+ Requires<[HasV5T]>;
+
+def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i64>]>;
+
+def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
+
+let hasNewValue = 1, validSubTargets = HasV5SubT in
+def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+ "$Rd = popcount($Rss)",
+ [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1000011;
+ let Inst{7-5} = 0b011;
+ let Inst{4-0} = Rd;
+ let Inst{20-16} = Rss;
+ }
+
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+let isFP = 1, hasNewValue = 1, opNewValue = 0 in
+class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+ : MInst<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#mnemonic#"($Rs, $Rt)", [],
+ "" , M_tc_3or4x_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+let isCommutable = 1 in {
+ def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
+ def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
+}
+
+def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
+
+def: Pat<(f32 (fadd F32:$src1, F32:$src2)),
+ (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fsub F32:$src1, F32:$src2)),
+ (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fmul F32:$src1, F32:$src2)),
+ (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Itinerary = M_tc_3x_SLOT23 in {
+ def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
+ def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
+}
+
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+ def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+ F32:$src1, F32:$src2)),
+ (F2_sfmin F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+ F32:$src2, F32:$src1)),
+ (F2_sfmin F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+ F32:$src1, F32:$src2)),
+ (F2_sfmax F32:$src1, F32:$src2)>;
+
+ def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+ F32:$src2, F32:$src1)),
+ (F2_sfmax F32:$src1, F32:$src2)>;
+}
+
+def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
+def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
+
+// F2_sfrecipa: Reciprocal approximation for division.
+let isPredicateLate = 1, isFP = 1,
+hasSideEffects = 0, hasNewValue = 1 in
+def F2_sfrecipa: MInst <
+ (outs IntRegs:$Rd, PredRegs:$Pe),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd, $Pe = sfrecipa($Rs, $Rt)">,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<2> Pe;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+ let Inst{27-21} = 0b1011111;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6-5} = Pe;
+ let Inst{4-0} = Rd;
+ }
+
+// F2_dfcmpeq: Floating point compare for equal.
+let isCompare = 1, isFP = 1 in
+class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
+ list<dag> pattern = [] >
+ : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
+ "$dst = "#mnemonic#"($src1, $src2)", pattern,
+ "" , ALU64_tc_2early_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<2> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let IClass = 0b1101;
+
+ let Inst{27-21} = 0b0010111;
+ let Inst{20-16} = src1;
+ let Inst{12-8} = src2;
+ let Inst{7-5} = MinOp;
+ let Inst{1-0} = dst;
+ }
+
+class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+ : T_fcmp <mnemonic, DoubleRegs, MinOp,
+ [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> {
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b0010111;
+}
+
+class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+ : T_fcmp <mnemonic, IntRegs, MinOp,
+ [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> {
+ let IClass = 0b1100;
+ let Inst{27-21} = 0b0111111;
+}
+
+def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
+def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
+def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
+def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>;
+
+def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
+def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>;
+def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
+def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (IntMI F32:$src1, F32:$src2)>;
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seto F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)),
+ (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpgt F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setult F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpgt F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setole F32:$src1, fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setole F64:$src1, fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpge F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setule F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpge F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setone F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+ def: Pat<(i1 (setone F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+ def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+ def: Pat<(i1 (setune F32:$src1, fpimm:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>;
+ def: Pat<(i1 (setune F64:$src1, fpimm:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1,
+ (CONST64_Float_Real fpimm:$src2))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+ // lt.
+ def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)),
+ (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)),
+ (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+ // le.
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setle F32:$src1, fpimm:$src2)),
+ (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setle F64:$src1, fpimm:$src2)),
+ (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+ // ne.
+ def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setne F32:$src1, fpimm:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+ def: Pat<(i1 (setne F64:$src1, fpimm:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// F2 convert template classes:
+let isFP = 1 in
+class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+ "$Rdd = "#mnemonic#"($Rss)"#chop,
+ [(set RCOut:$Rdd, (Op RCIn:$Rss))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0000111;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+let isFP = 1 in
+class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
+ "$Rdd = "#mnemonic#"($Rs)"#chop,
+ [(set RCOut:$Rdd, (Op RCIn:$Rs))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rdd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0100100;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rdd;
+ }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+ "$Rd = "#mnemonic#"($Rss)"#chop,
+ [(set RCOut:$Rd, (Op RCIn:$Rss))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rss;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1000;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = Rss;
+ let Inst{7-5} = 0b001;
+ let Inst{4-0} = Rd;
+ }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+ string chop ="">
+ : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = "#mnemonic#"($Rs)"#chop,
+ [(set RCOut:$Rd, (Op RCIn:$Rs))], "",
+ S_2op_tc_3or4x_SLOT23> {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-24} = 0b1011;
+ let Inst{23-21} = MajOp;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = MinOp;
+ let Inst{4-0} = Rd;
+ }
+
+// Convert single precision to double precision and vice-versa.
+def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
+ fextend, F64, F32>;
+
+def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000,
+ fround, F32, F64>;
+
+// Convert Integer to Floating Point.
+def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010,
+ sint_to_fp, F32, I64>;
+def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001,
+ uint_to_fp, F32, I64>;
+def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000,
+ uint_to_fp, F32, I32>;
+def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000,
+ sint_to_fp, F32, I32>;
+def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011,
+ sint_to_fp, F64, I64>;
+def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010,
+ uint_to_fp, F64, I64>;
+def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001,
+ uint_to_fp, F64, I32>;
+def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010,
+ sint_to_fp, F64, I32>;
+
+// Convert Floating Point to Integer - default.
+def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101,
+ fp_to_uint, I32, F64, ":chop">;
+def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111,
+ fp_to_sint, I32, F64, ":chop">;
+def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
+ fp_to_uint, I32, F32, ":chop">;
+def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
+ fp_to_sint, I32, F32, ":chop">;
+def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110,
+ fp_to_sint, I64, F64, ":chop">;
+def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111,
+ fp_to_uint, I64, F64, ":chop">;
+def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110,
+ fp_to_sint, I64, F32, ":chop">;
+def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101,
+ fp_to_uint, I64, F32, ":chop">;
+
+// Convert Floating Point to Integer: non-chopped.
+let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
+ def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000,
+ fp_to_sint, I64, F64>;
+ def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001,
+ fp_to_uint, I64, F64>;
+ def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011,
+ fp_to_uint, I64, F32>;
+ def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100,
+ fp_to_sint, I64, F32>;
+ def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011,
+ fp_to_uint, I32, F64>;
+ def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100,
+ fp_to_sint, I32, F64>;
+ def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000,
+ fp_to_uint, I32, F32>;
+ def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
+ fp_to_sint, I32, F32>;
+}
+
+// Fix up radicand.
+let isFP = 1, hasNewValue = 1 in
+def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+ "$Rd = sffixupr($Rs)",
+ [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1011101;
+ let Inst{20-16} = Rs;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rd;
+ }
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+ def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+ def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+ def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+ def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
+// F2_sffma: Floating-point fused multiply add.
+let isFP = 1, hasNewValue = 1 in
+class T_sfmpy_acc <bit isSub, bit isLib>
+ : MInst<(outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6} = isLib;
+ let Inst{5} = isSub;
+ let Inst{4-0} = Rx;
+ }
+
+def F2_sffma: T_sfmpy_acc <0, 0>;
+def F2_sffms: T_sfmpy_acc <1, 0>;
+def F2_sffma_lib: T_sfmpy_acc <0, 1>;
+def F2_sffms_lib: T_sfmpy_acc <1, 1>;
+
+def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
+ (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
+
+// Floating-point fused multiply add w/ additional scaling (2**pu).
+let isFP = 1, hasNewValue = 1 in
+def F2_sffma_sc: MInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
+ "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<2> Pu;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111011;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rx;
+ }
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+ (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+ (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+ (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+ (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = MUX_ir_f(p0, #i, r1)
+def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3),
+ (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = MUX_ri_f(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3),
+ (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+ (LoReg (F2_conv_df2d_chop F64:$src1))>,
+ Requires<[HasV5T]>;
+
+//===----------------------------------------------------------------------===//
+// :natural forms of vasrh and vasrhub insns
+//===----------------------------------------------------------------------===//
+// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
+// saturate, and pack.
+let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ASRHUB<bit isSat>
+ : SInst <(outs IntRegs:$Rd),
+ (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
+ [], "", S_2op_tc_2_SLOT23>,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<5> Rss;
+ bits<4> u4;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1000011;
+ let Inst{20-16} = Rss;
+ let Inst{13-12} = 0b00;
+ let Inst{11-8} = u4;
+ let Inst{7-6} = 0b10;
+ let Inst{5} = isSat;
+ let Inst{4-0} = Rd;
+ }
+
+def S5_asrhub_rnd_sat : T_ASRHUB <0>;
+def S5_asrhub_sat : T_ASRHUB <1>;
+
+let isAsmParserOnly = 1 in
+def S5_asrhub_rnd_sat_goodsyntax
+ : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
+
+// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
+let hasSideEffects = 0 in
+def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
+ (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rdd = vasrh($Rss, #$u4):raw">,
+ Requires<[HasV5T]> {
+ bits<5> Rdd;
+ bits<5> Rss;
+ bits<4> u4;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b0000001;
+ let Inst{20-16} = Rss;
+ let Inst{13-12} = 0b00;
+ let Inst{11-8} = u4;
+ let Inst{7-5} = 0b000;
+ let Inst{4-0} = Rdd;
+ }
+
+let isAsmParserOnly = 1 in
+def S5_vasrhrnd_goodsyntax
+ : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+ "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
+
+// Floating point reciprocal square root approximation
+let Uses = [USR], isPredicateLate = 1, isFP = 1,
+ hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
+ validSubTargets = HasV5SubT in
+def F2_sfinvsqrta: SInst <
+ (outs IntRegs:$Rd, PredRegs:$Pe),
+ (ins IntRegs:$Rs),
+ "$Rd, $Pe = sfinvsqrta($Rs)" > ,
+ Requires<[HasV5T]> {
+ bits<5> Rd;
+ bits<2> Pe;
+ bits<5> Rs;
+
+ let IClass = 0b1000;
+
+ let Inst{27-21} = 0b1011111;
+ let Inst{20-16} = Rs;
+ let Inst{7} = 0b0;
+ let Inst{6-5} = Pe;
+ let Inst{4-0} = Rd;
+ }
+
+// Complex multiply 32x16
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+ def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
+ def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
+}
+
+// Classify floating-point value
+let isFP = 1 in
+ def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
+
+let isFP = 1 in
+def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
+ "$Pd = dfclass($Rss, #$u5)",
+ [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
+ bits<2> Pd;
+ bits<5> Rss;
+ bits<5> u5;
+
+ let IClass = 0b1101;
+ let Inst{27-21} = 0b1100100;
+ let Inst{20-16} = Rss;
+ let Inst{12-10} = 0b000;
+ let Inst{9-5} = u5;
+ let Inst{4-3} = 0b10;
+ let Inst{1-0} = Pd;
+ }
+
+// Instructions to create floating point constant
+class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
+ : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
+ "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
+ [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> {
+ bits<5> dst;
+ bits<10> src;
+
+ let IClass = 0b1101;
+ let Inst{27-24} = RegType;
+ let Inst{23} = 0b0;
+ let Inst{22} = isNeg;
+ let Inst{21} = src{9};
+ let Inst{13-5} = src{8-0};
+ let Inst{4-0} = dst;
+ }
+
+let hasNewValue = 1, opNewValue = 0 in {
+def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
+def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
+}
+
+def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
+def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
+
+def : Pat <(fabs (f32 IntRegs:$src1)),
+ (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fneg (f32 IntRegs:$src1)),
+ (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
new file mode 100644
index 0000000..897ada0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -0,0 +1,2241 @@
+//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Vector store
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+{
+ class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
+
+}
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+ class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_LD,
+ IType type = TypeCVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+//===----------------------------------------------------------------------===//
+// Vector loads with base + immediate offset
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vload_ai<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
+ asmStr>;
+
+let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
+class T_vload_ai_128B<string asmStr>
+ : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
+ asmStr>;
+
+let isCVLoadable = 1, hasNewValue = 1 in {
+ def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_enc;
+ def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_enc;
+ // 128B
+ def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_128B_enc;
+ def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
+ def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_enc;
+ def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
+ hasNewValue = 1 in {
+ def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_enc;
+ def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_enc;
+ // 128B
+ def V6_vL32b_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_128B_enc;
+ def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
+ def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_enc;
+ def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_tmp_ai_enc;
+ // 128B
+ def V6_vL32b_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_128B_enc;
+ def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_nt_tmp_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_enc;
+ def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_128B_enc;
+}
+
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_enc;
+ def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_enc;
+ def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_128B_enc;
+}
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
+ Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
+class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
+def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_ai_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_enc;
+ def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1 in
+class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "
+ #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_enc;
+ def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_enc;
+ // 128B
+ def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_128B_enc;
+ def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_enc;
+ def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_128B_enc;
+ def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_enc;
+ def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_enc;
+ // 128B
+ def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_128B_enc;
+ def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset in
+class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
+ bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
+def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
+ V6_vS32b_nqpred_ai_enc;
+def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>,
+ V6_vS32b_nt_qpred_ai_enc;
+def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
+ V6_vS32b_nt_nqpred_ai_enc;
+// 128B
+def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
+def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
+ V6_vS32b_nqpred_ai_128B_enc;
+def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>,
+ V6_vS32b_nt_qpred_ai_128B_enc;
+def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
+ V6_vS32b_nt_nqpred_ai_128B_enc;
+
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
+ isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
+class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+
+def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_enc;
+def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_enc;
+// 128B
+def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_128B_enc;
+def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_enc;
+ def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_128B_enc;
+ def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, hasNewValue = 1 in
+class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
+ "$src1 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vload_pi_64B <string asmStr>
+ : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vload_pi_128B <string asmStr>
+ : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_enc;
+ def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_enc;
+ // 128B
+ def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_128B_enc;
+ def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
+ def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_enc;
+ // 128B
+ def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_128B_enc;
+}
+
+let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
+ def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_enc;
+ def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_enc;
+ // 128B
+ def V6_vL32b_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_128B_enc;
+ def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_enc;
+ def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_enc;
+ //128B
+ def V6_vL32b_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_128B_enc;
+ def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let accessSize = Vector64Access in
+class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
+ def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pi_128B_enc;
+}
+
+let isNVStorable = 1 , isNonTemporal = 1 in {
+ def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_enc;
+ def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_enc;
+ def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment unconditional .new vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, isNVStore = 1 in
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
+
+
+def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pi_enc;
+def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_enc;
+ def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, addrMode = PostInc in
+class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot, bit isNT>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_enc;
+ def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_enc;
+ // 128B
+ def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_128B_enc;
+ def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_enc;
+ def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_128B_enc;
+ def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_enc;
+ def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_enc;
+ // 128B
+ def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_128B_enc;
+ def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
+ bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
+def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
+// 128B
+def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B,
+ V6_vS32b_qpred_pi_128B_enc;
+def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
+ V6_vS32b_nqpred_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>,
+ V6_vS32b_nt_qpred_pi_enc;
+ def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
+ V6_vS32b_nt_nqpred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>,
+ V6_vS32b_nt_qpred_pi_128B_enc;
+ def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
+ V6_vS32b_nt_nqpred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
+class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_"> , NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_enc;
+def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_enc;
+// 128B
+def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_128B_enc;
+def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_enc;
+ def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_128B_enc;
+ def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with register offset
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_vload_ppu<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">,
+ V6_vL32b_ppu_enc;
+ def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_ppu_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
+def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
+ V6_vL32Ub_ppu_enc;
+
+let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
+ def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
+ V6_vL32b_cur_ppu_enc;
+ def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_cur_ppu_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
+ V6_vL32b_tmp_ppu_enc;
+ def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_tmp_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset
+//===----------------------------------------------------------------------===//
+class T_vstore_ppu <string mnemonic, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_ppu : T_vstore_ppu <"vmem">,
+ V6_vS32b_ppu_enc;
+ let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
+ def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
+ V6_vS32b_nt_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
+def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_ppu <bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let BaseOpcode = "vS32b_ppu" in
+def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
+def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
+ def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>,
+ V6_vS32b_nt_pred_ppu_enc;
+ def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
+ V6_vS32b_nt_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
+ Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">,
+ V6_vS32Ub_pred_ppu_enc;
+ def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
+ V6_vS32Ub_npred_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel;
+
+def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
+def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
+def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>,
+ V6_vS32b_nt_qpred_ppu_enc;
+def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
+ V6_vS32b_nt_nqpred_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, isNVStore = 1 in
+class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu,
+ V6_vS32b_new_pred_ppu_enc;
+ def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
+ V6_vS32b_new_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>,
+ V6_vS32b_nt_new_pred_ppu_enc;
+def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
+ V6_vS32b_nt_new_npred_ppu_enc;
+}
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
+ VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
+ #mnemonic#"($addr+#$off) = $src", []>;
+
+def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
+ Requires<[HasV60T, UseHVXSgl]>;
+def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
+ Requires<[HasV60T, UseHVXDbl]>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (STrivv_indexed_128B IntRegs:$addr, #0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned stores
+ def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Aligned stores
+ def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector store.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, s4_6ImmPred:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // Fold Add R+IFF into vector store 128B.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, s4_7ImmPred:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : vS32b_ai_pats <v64i8, v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64, v16i64>;
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
+ "$dst="#mnemonic#"($addr+#$off)",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
+def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (LDrivv_indexed IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (LDrivv_indexed_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned loads
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (V6_vL32b_ai IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Load
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector load.
+ let AddedComplexity = 10 in
+ def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ Requires<[UseHVXDbl]>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ Requires<[UseHVXSgl]>;
+}
+
+defm : vL32b_ai_pats <v64i8, v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64, v16i64>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriq_pred_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_vec_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+def STriq_pred_vec_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector predicate pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store vector pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STriv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STrivv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STrivv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VA_DV,
+ IType type = TypeCVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
+ (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
+ (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+}
+
+def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval), SETEQ)),
+ (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
+ (i32 IntRegs:$rhs))),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval)))>;
+
+
+let hasNewValue = 1 in
+class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ asmString >;
+
+multiclass T_vmpy <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_vmpy <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_vmpy_VV <string asmString>:
+ T_vmpy <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_vmpy_WW <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VecDblRegs>;
+
+multiclass T_vmpy_VW <string asmString>:
+ T_vmpy <asmString, VectorRegs, VecDblRegs>;
+
+multiclass T_vmpy_WV <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VectorRegs>;
+
+defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
+defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
+defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
+defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
+defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
+defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
+defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
+defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
+defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
+defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
+defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
+
+let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
+defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
+
+defm V6_vdmpybus_dv :
+ T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
+defm V6_vdmpyhsusat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
+defm V6_vdmpyhsuisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
+defm V6_vdmpyhsat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
+defm V6_vdmpyhisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
+defm V6_vdmpyhb_dv :
+ T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
+defm V6_vmpyhss :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
+defm V6_vmpyhsrs :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in
+defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
+defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
+defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
+defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
+defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
+defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
+defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
+defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
+defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
+defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
+}
+
+let hasNewValue = 1 in
+class T_HVX_alu <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_alu_VV <string asmString>:
+ T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_WW <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
+
+multiclass T_HVX_alu_WV <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
+
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vrmpyubv :
+ T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
+defm V6_vrmpybv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
+defm V6_vrmpybusv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
+defm V6_vabsdiffub :
+ T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
+defm V6_vabsdiffh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
+defm V6_vabsdiffuh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
+defm V6_vabsdiffw :
+ T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
+}
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhvsat :
+ T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
+defm V6_vmpyhvsrs :
+ T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
+defm V6_vmpyih :
+ T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
+}
+
+defm V6_vand :
+ T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
+defm V6_vor :
+ T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
+defm V6_vxor :
+ T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
+defm V6_vaddw :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
+defm V6_vaddubsat :
+ T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
+defm V6_vadduhsat :
+ T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
+defm V6_vaddhsat :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
+defm V6_vaddwsat :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
+defm V6_vsubb :
+ T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
+defm V6_vsubh :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
+defm V6_vsubw :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
+defm V6_vsububsat :
+ T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
+defm V6_vsubuhsat :
+ T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
+defm V6_vsubhsat :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
+defm V6_vsubwsat :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
+defm V6_vavgub :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
+defm V6_vavguh :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
+defm V6_vavgh :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
+defm V6_vavgw :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
+defm V6_vnavgub :
+ T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
+defm V6_vnavgh :
+ T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
+defm V6_vnavgw :
+ T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
+defm V6_vavgubrnd :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
+defm V6_vavguhrnd :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
+defm V6_vavghrnd :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
+defm V6_vavgwrnd :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
+
+defm V6_vmpybv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
+defm V6_vmpyubv :
+ T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
+defm V6_vmpybusv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
+defm V6_vmpyhv :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
+defm V6_vmpyuhv :
+ T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
+defm V6_vmpyhus :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
+defm V6_vaddubh :
+ T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
+defm V6_vadduhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
+defm V6_vaddhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
+defm V6_vsububh :
+ T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
+defm V6_vsubuhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
+defm V6_vsubhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
+
+defm V6_vaddb_dv :
+ T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
+defm V6_vaddh_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
+defm V6_vaddw_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
+defm V6_vaddubsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
+defm V6_vadduhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
+defm V6_vaddhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
+defm V6_vaddwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
+defm V6_vsubb_dv :
+ T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
+defm V6_vsubh_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
+defm V6_vsubw_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
+defm V6_vsububsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
+defm V6_vsubuhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
+defm V6_vsubhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
+defm V6_vsubwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
+defm V6_vmpabusv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
+defm V6_vmpabuuv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1 in
+class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
+ def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin1#"128B"),
+ !cast<RegisterClass>(RCin2#
+ !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
+}
+
+multiclass T_HVX_vmpyacc_VVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
+
+multiclass T_HVX_vmpyacc_VWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_VVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+
+defm V6_vtmpyb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
+ V6_vtmpyb_acc_enc;
+defm V6_vtmpybus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
+ V6_vtmpybus_acc_enc;
+defm V6_vtmpyhb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
+ V6_vtmpyhb_acc_enc;
+defm V6_vdmpyhb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_acc_enc;
+defm V6_vrmpyub_acc :
+ T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyub_acc_enc;
+defm V6_vrmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybus_acc_enc;
+defm V6_vdmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_acc_enc;
+defm V6_vdmpybus_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_dv_acc_enc;
+defm V6_vdmpyhsuisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
+ V6_vdmpyhsuisat_acc_enc;
+defm V6_vdmpyhisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhisat_acc_enc;
+defm V6_vdmpyhb_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_dv_acc_enc;
+defm V6_vmpybus_acc :
+ T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybus_acc_enc;
+defm V6_vmpabus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
+ V6_vmpabus_acc_enc;
+defm V6_vmpahb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
+ V6_vmpahb_acc_enc;
+defm V6_vmpyhsat_acc :
+ T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
+ V6_vmpyhsat_acc_enc;
+defm V6_vmpyuh_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuh_acc_enc;
+defm V6_vmpyiwb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
+ V6_vmpyiwb_acc_enc;
+defm V6_vdsaduh_acc :
+ T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
+ V6_vdsaduh_acc_enc;
+defm V6_vmpyihb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
+ V6_vmpyihb_acc_enc;
+defm V6_vmpyub_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyub_acc_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhsusat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
+ V6_vdmpyhsusat_acc_enc;
+defm V6_vdmpyhsat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhsat_acc_enc;
+defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
+ <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vaslw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
+defm V6_vasrw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
+}
+
+defm V6_vdmpyhvsat_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhvsat_acc_enc;
+defm V6_vmpybusv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybusv_acc_enc;
+defm V6_vmpybv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
+defm V6_vmpyhus_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
+defm V6_vmpyhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
+defm V6_vmpyiewh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
+ V6_vmpyiewh_acc_enc;
+defm V6_vmpyiewuh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
+ V6_vmpyiewuh_acc_enc;
+defm V6_vmpyih_acc :
+ T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
+defm V6_vmpyowh_rnd_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
+ V6_vmpyowh_rnd_sacc_enc;
+defm V6_vmpyowh_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
+ V6_vmpyowh_sacc_enc;
+defm V6_vmpyubv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyubv_acc_enc;
+defm V6_vmpyuhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuhv_acc_enc;
+defm V6_vrmpybusv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybusv_acc_enc;
+defm V6_vrmpybv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
+defm V6_vrmpyubv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyubv_acc_enc;
+
+
+class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_vcmp <string asmString> {
+ def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
+defm V6_veqh_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
+defm V6_veqw_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
+defm V6_vgtb_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
+defm V6_vgth_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
+defm V6_vgtw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
+defm V6_vgtub_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
+defm V6_vgtuh_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
+defm V6_vgtuw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
+defm V6_veqb_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
+defm V6_veqh_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
+defm V6_veqw_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
+defm V6_vgtb_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
+defm V6_vgth_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
+defm V6_vgtw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
+defm V6_vgtub_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
+defm V6_vgtuh_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
+defm V6_vgtuw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
+defm V6_veqb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
+defm V6_veqh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
+defm V6_veqw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
+defm V6_vgtb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
+defm V6_vgth_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
+defm V6_vgtw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
+defm V6_vgtub_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
+defm V6_vgtuh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
+defm V6_vgtuw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
+
+defm V6_vminub :
+ T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
+defm V6_vminuh :
+ T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
+defm V6_vminh :
+ T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
+defm V6_vminw :
+ T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
+defm V6_vmaxub :
+ T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
+defm V6_vmaxuh :
+ T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
+defm V6_vmaxh :
+ T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
+defm V6_vmaxw :
+ T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
+defm V6_vshuffeb :
+ T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
+defm V6_vshuffob :
+ T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
+defm V6_vshufeh :
+ T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
+defm V6_vshufoh :
+ T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vmpyowh_rnd :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
+ V6_vmpyowh_rnd_enc;
+defm V6_vmpyiewuh :
+ T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
+defm V6_vmpyewuh :
+ T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
+defm V6_vmpyowh :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
+defm V6_vmpyiowh :
+ T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
+}
+let Itinerary = CVI_VX, Type = TypeCVI_VX in
+defm V6_vmpyieoh :
+ T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
+defm V6_vshufoeh :
+ T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
+defm V6_vshufoeb :
+ T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
+}
+
+let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+defm V6_vcombine :
+ T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+ SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+ (v16i32 VectorRegs:$Vt))),
+ (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+ (v32i32 VecDblRegs:$Vt))),
+ (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+
+let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
+defm V6_vsathub :
+ T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
+defm V6_vsatwh :
+ T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vroundwh :
+ T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
+defm V6_vroundwuh :
+ T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
+defm V6_vroundhb :
+ T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
+defm V6_vroundhub :
+ T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
+defm V6_vasrwv :
+ T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
+defm V6_vlsrwv :
+ T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
+defm V6_vlsrhv :
+ T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
+defm V6_vasrhv :
+ T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
+defm V6_vaslwv :
+ T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
+defm V6_vaslhv :
+ T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
+}
+
+defm V6_vaddb :
+ T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
+defm V6_vaddh :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdelta :
+ T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
+defm V6_vrdelta :
+ T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
+defm V6_vdealb4w :
+ T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
+defm V6_vpackeb :
+ T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
+defm V6_vpackeh :
+ T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
+defm V6_vpackhub_sat :
+ T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
+defm V6_vpackhb_sat :
+ T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
+defm V6_vpackwuh_sat :
+ T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
+defm V6_vpackwh_sat :
+ T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
+defm V6_vpackob :
+ T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
+defm V6_vpackoh :
+ T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
+ : CVI_VA_Resource1 <(outs RC2:$dst),
+ (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_condALU <string asmString> {
+ def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
+ V6_vaddbq_enc;
+defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
+ V6_vaddhq_enc;
+defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
+ V6_vaddwq_enc;
+defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
+ V6_vsubbq_enc;
+defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
+ V6_vsubhq_enc;
+defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
+ V6_vsubwq_enc;
+defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
+ V6_vaddbnq_enc;
+defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
+ V6_vaddhnq_enc;
+defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
+ V6_vaddwnq_enc;
+defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
+ V6_vsubbnq_enc;
+defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
+ V6_vsubhnq_enc;
+defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
+ V6_vsubwnq_enc;
+
+let hasNewValue = 1 in
+class T_HVX_alu_2op <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu_2op <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+let hasNewValue = 1 in
+multiclass T_HVX_alu_2op_VV <string asmString>:
+ T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_2op_WV <string asmString>:
+ T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
+
+
+defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
+ V6_vabsh_enc;
+defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
+ V6_vabsw_enc;
+defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
+ V6_vabsh_sat_enc;
+defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
+ V6_vabsw_sat_enc;
+defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
+ V6_vnot_enc;
+defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">,
+ V6_vassign_enc;
+
+defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
+ V6_vzb_enc;
+defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
+ V6_vzh_enc;
+defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
+ V6_vsb_enc;
+defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
+ V6_vsh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
+ V6_vdealh_enc;
+defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
+ V6_vdealb_enc;
+defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
+ V6_vshuffh_enc;
+defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
+ V6_vshuffb_enc;
+}
+
+let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
+defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
+ V6_vunpackub_enc;
+defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
+ V6_vunpackuh_enc;
+defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
+ V6_vunpackb_enc;
+defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
+ V6_vunpackh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
+ V6_vcl0w_enc;
+defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
+ V6_vcl0h_enc;
+defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
+ V6_vnormamtw_enc;
+defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
+ V6_vnormamth_enc;
+defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
+ V6_vpopcounth_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
+ Type = TypeCVI_VX_DV in
+class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst),
+ (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString, [], "$dst = $_src_" > ;
+
+
+multiclass T_HVX_vmpyacc2 <string asmString> {
+ def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi_acc :
+ T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
+ V6_vrmpybusi_acc_enc;
+defm V6_vrsadubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
+ V6_vrsadubi_acc_enc;
+defm V6_vrmpyubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
+ V6_vrmpyubi_acc_enc;
+
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
+class T_HVX_vmpy2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString>;
+
+
+multiclass T_HVX_vmpy2 <string asmString> {
+ def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi :
+ T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
+defm V6_vrsadubi :
+ T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
+defm V6_vrmpyubi :
+ T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
+
+
+let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
+ hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
+class T_HVX_perm <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
+ (ins RC:$src1, RC:$src2, IntRegs:$src3),
+ asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
+
+multiclass T_HVX_perm <string asmString> {
+ def NAME : T_HVX_perm <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
+ defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
+ defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
+}
+
+// Conditional vector move.
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_cmov <bit isPredNot, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_cmov <bit isPredNot = 0> {
+ def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
+}
+
+defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
+defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
+
+// Conditional vector combine.
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
+ hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 < (outs RCout:$dst),
+ (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_ccombine <bit isPredNot = 0> {
+ def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
+defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
+
+let hasNewValue = 1 in
+class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString >;
+
+multiclass T_HVX_shift <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_shift <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_shift_VV <string asmString>:
+ T_HVX_shift <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_shift_WV <string asmString>:
+ T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
+defm V6_valignb :
+ T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
+defm V6_vlalignb :
+ T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrwh :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
+defm V6_vasrwhsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwhsat_enc;
+defm V6_vasrwhrndsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
+ V6_vasrwhrndsat_enc;
+defm V6_vasrwuhsat :
+ T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwuhsat_enc;
+defm V6_vasrhubsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
+ V6_vasrhubsat_enc;
+defm V6_vasrhubrndsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhubrndsat_enc;
+defm V6_vasrhbrndsat :
+ T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhbrndsat_enc;
+}
+
+// Assembler mapped -- alias?
+//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
+defm V6_vshuffvdd :
+ T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
+defm V6_vdealvdd :
+ T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
+}
+
+let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
+class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_unpack <string asmString> {
+ def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
+defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_valign <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+ asmString>;
+
+multiclass T_HVX_valign <string asmString> {
+ def NAME : T_HVX_valign <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
+}
+
+defm V6_valignbi :
+ T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
+defm V6_vlalignbi :
+ T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+class T_HVX_predAlu <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asmString>;
+
+multiclass T_HVX_predAlu <string asmString> {
+ def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
+}
+
+defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
+defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
+defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
+defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
+defm V6_pred_and_n :
+ T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_prednot <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
+ "$dst = not($src1)">, V6_pred_not_enc;
+
+def V6_pred_not : T_HVX_prednot <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >;
+
+multiclass T_HVX_vcmp2 <string asmString> {
+ def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
+defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
+defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
+defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
+defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
+defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
+defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
+defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
+defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
+
+let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
+
+def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
+
+let isAccumulator = 1 in
+class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
+
+def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
+
+def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_lvsplatw <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
+
+def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
+
+
+let hasNewValue = 1 in
+class T_V6_vinsertwr <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
+ "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
+ V6_vinsertwr_enc;
+
+def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
+class T_V6_pred_scalar2 <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
+
+def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
+
+class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
+
+def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
+ : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>;
+
+class T_HVX_rol_R <string asmString>
+ : T_HVX_rol <asmString, IntRegs, u5Imm>;
+class T_HVX_rol_P <string asmString>
+ : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+
+def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
+let hasNewValue = 1, opNewValue = 0 in
+def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
+ : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
+ asmString, [], "$dst = $_src_" >;
+
+class T_HVX_rol_acc_P <string asmString>
+ : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+
+class T_HVX_rol_acc_R <string asmString>
+ : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+
+def S6_rol_i_p_nac :
+ T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
+def S6_rol_i_p_acc :
+ T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
+def S6_rol_i_p_and :
+ T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
+def S6_rol_i_p_or :
+ T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
+def S6_rol_i_p_xacc :
+ T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
+
+let hasNewValue = 1, opNewValue = 0 in {
+def S6_rol_i_r_nac :
+ T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
+def S6_rol_i_r_acc :
+ T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
+def S6_rol_i_r_and :
+ T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
+def S6_rol_i_r_or :
+ T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
+def S6_rol_i_r_xacc :
+ T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
+}
+
+let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
+class T_V6_extractw <RegisterClass RC>
+ : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
+ "$dst = vextract($src1,$src2)">, V6_extractw_enc;
+
+def V6_extractw : T_V6_extractw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
+
+let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in
+class T_sys0op <string asmString>
+ : ST1Inst <(outs), (ins), asmString>;
+
+let isSolo = 1, validSubTargets = HasV55SubT in {
+def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
+def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
+def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
+}
+
+class T_sys1op <string asmString, RegisterClass RC>
+ : ST1Inst <(outs), (ins RC:$src1), asmString>;
+
+class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
+class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
+
+let isSoloAX = 1, validSubTargets = HasV55SubT in
+def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
+
+let isSolo = 1, validSubTargets = HasV60SubT in {
+def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
+def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
+}
+
+let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
+ validSubTargets = HasV55SubT in
+def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = l2locka($src1)">, Y5_l2locka_enc;
+
+// not defined on etc side. why?
+// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
+
+let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
+ hasSideEffects = 0,
+validSubTargets = HasV55SubT in
+def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
+ (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst1,$dst2 = vacsh($src1,$src2)", [],
+ "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
+ RegisterClass RCin2>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
+
+multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
+ def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
+ VecPredRegs128B, VectorRegs128B>;
+}
+
+multiclass T_HVX_alu2_V <string asmString> :
+ T_HVX_alu2 <asmString, VectorRegs>;
+
+multiclass T_HVX_alu2_W <string asmString> :
+ T_HVX_alu2 <asmString, VecDblRegs>;
+
+defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
+ hasSideEffects = 0 in
+defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
+
+class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
+
+multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_V <string asmString> :
+ T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_W <string asmString> :
+ T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
+class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb_acc<asmString,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_acc_V <string asmString> :
+ T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_acc_W <string asmString> :
+ T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
+defm V6_vlutvvb:
+ T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
+defm V6_vlutvwh:
+ T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
+
+let hasNewValue = 1 in {
+ defm V6_vlutvvb_oracc:
+ T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
+ V6_vlutvvb_oracc_enc;
+ defm V6_vlutvwh_oracc:
+ T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
+ V6_vlutvwh_oracc_enc;
+}
+
+// It's a fake instruction and should not be defined?
+def S2_cabacencbin
+ : SInst2<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
+
+// Vhist instructions
+def V6_vhistq
+ : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
+ "vhist($src1)">, V6_vhistq_enc;
+
+def V6_vhist
+ : CVI_HIST_Resource1 <(outs), (ins),
+ "vhist" >, V6_vhist_enc;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
new file mode 100644
index 0000000..96dd531
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -0,0 +1,526 @@
+//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon Vector instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
+def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
+def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
+def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a IntRegs:$src))),
+ (b IntRegs:$src)>;
+ def : Pat <(a (bitconvert (b IntRegs:$src))),
+ (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+ (b DoubleRegs:$src)>;
+ def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+ (a DoubleRegs:$src)>;
+}
+
+multiclass bitconvert_vec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VectorRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VectorRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs:$src))),
+ (b VecDblRegs:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs:$src))),
+ (a VecDblRegs:$src)>;
+}
+
+multiclass bitconvert_predvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecPredRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VecPredRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
+ (b VecDblRegs128B:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
+ (a VecDblRegs128B:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+defm : bitconvert_vec<v64i8, v16i32>;
+defm : bitconvert_vec<v8i64 , v16i32>;
+defm : bitconvert_vec<v32i16, v16i32>;
+
+defm : bitconvert_dblvec<v16i64, v128i8>;
+defm : bitconvert_dblvec<v32i32, v128i8>;
+defm : bitconvert_dblvec<v64i16, v128i8>;
+
+defm : bitconvert_dblvec128B<v64i32, v128i16>;
+defm : bitconvert_dblvec128B<v256i8, v128i16>;
+defm : bitconvert_dblvec128B<v32i64, v128i16>;
+
+defm : bitconvert_dblvec128B<v64i32, v256i8>;
+defm : bitconvert_dblvec128B<v32i64, v256i8>;
+defm : bitconvert_dblvec128B<v128i16, v256i8>;
+
+// Vector shift support. Vector shifting in Hexagon is rather different
+// from internal representation of LLVM.
+// LLVM assumes all shifts (in vector case) will have the form
+// <VT> = SHL/SRA/SRL <VT> by <VT>
+// while Hexagon has the following format:
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
+// As a result, special care is needed to guarantee correctness and
+// performance.
+class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+ : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
+ [(set (v4i16 DoubleRegs:$dst),
+ (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
+ bits<4> src2;
+ let Inst{11-8} = src2;
+}
+
+class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+ : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
+ [(set (v2i32 DoubleRegs:$dst),
+ (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
+ bits<5> src2;
+ let Inst{12-8} = src2;
+}
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
+def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
+def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
+
+def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
+def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
+def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+ : Pat <(Op Type:$Rss, Type:$Rtt),
+ (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh, add, V4I16>;
+def: VArith_pat <A2_vaddw, add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh, sub, V4I16>;
+def: VArith_pat <A2_vsubw, sub, V2I32>;
+
+def: VArith_pat <A2_and, and, V2I16>;
+def: VArith_pat <A2_xor, xor, V2I16>;
+def: VArith_pat <A2_or, or, V2I16>;
+
+def: VArith_pat <A2_andp, and, V8I8>;
+def: VArith_pat <A2_andp, and, V4I16>;
+def: VArith_pat <A2_andp, and, V2I32>;
+def: VArith_pat <A2_orp, or, V8I8>;
+def: VArith_pat <A2_orp, or, V4I16>;
+def: VArith_pat <A2_orp, or, V2I32>;
+def: VArith_pat <A2_xorp, xor, V8I8>;
+def: VArith_pat <A2_xorp, xor, V4I16>;
+def: VArith_pat <A2_xorp, xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+// Vector shift words by register
+def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
+def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
+def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
+def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
+
+// Vector shift halfwords by register
+def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
+def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
+def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
+def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(Op Value:$Rs, I32:$Rt),
+ (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+ (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+ : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+ (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW\"",
+ [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW_ACC\"",
+ [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+ "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+ (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+ (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+ (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+ (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+ ValueType CmpTy>
+ : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+ (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+ (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+ (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+ (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+ (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+ (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32. We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+ (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+ (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+ (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+ (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+ : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+ "$a = " # Str # "($b, $c)",
+ [(set (i64 DoubleRegs:$a),
+ (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+ "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+ : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+ (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+ (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+ (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 0000000..b207aaf
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,1293 @@
+//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+class T_I_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$Is),
+ (MI imm:$Is)>;
+
+class T_R_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs),
+ (MI I32:$Rs)>;
+
+class T_P_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs),
+ (MI DoubleRegs:$Rs)>;
+
+class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+ : Pat<(IntID Imm1:$Is, Imm2:$It),
+ (MI Imm1:$Is, Imm2:$It)>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID I32:$Rs, ImmPred:$It),
+ (MI I32:$Rs, ImmPred:$It)>;
+
+class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID ImmPred:$Is, I32:$Rt),
+ (MI ImmPred:$Is, I32:$Rt)>;
+
+class T_PI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID I64:$Rs, imm:$It),
+ (MI DoubleRegs:$Rs, imm:$It)>;
+
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID I32:$Rs, I64:$Rt),
+ (MI I32:$Rs, DoubleRegs:$Rt)>;
+
+class T_RR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt),
+ (MI I32:$Rs, I32:$Rt)>;
+
+class T_PP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+ : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It),
+ (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>;
+
+class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+ : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is),
+ (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>;
+
+class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+ : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs),
+ (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>;
+
+class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
+ (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
+
+class T_RII_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
+ (MI I32:$Rs, imm:$It, imm:$Iu)>;
+
+class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
+ (MI imm:$It, I32:$Rs, imm:$Iu)>;
+
+class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
+ (MI imm:$Is, I32:$Rs, I32:$Rt)>;
+
+class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
+ (MI I32:$Rs, imm:$Is, I32:$Rt)>;
+
+class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
+ (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
+
+class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>;
+
+class T_PII_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
+ (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>;
+
+class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>;
+
+class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>;
+
+class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
+ (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>;
+
+class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)),
+ (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>;
+
+class T_PR_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID I64:$Rs, I32:$Rt),
+ (MI DoubleRegs:$Rs, I32:$Rt)>;
+
+class T_D_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID (F64:$Rs)),
+ (MI (F64:$Rs))>;
+
+class T_DI_pat <InstHexagon MI, Intrinsic IntID,
+ PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID F64:$Rs, ImmPred:$It),
+ (MI F64:$Rs, ImmPred:$It)>;
+
+class T_F_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs),
+ (MI F32:$Rs)>;
+
+class T_FI_pat <InstHexagon MI, Intrinsic IntID,
+ PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+ : Pat<(IntID F32:$Rs, ImmPred:$It),
+ (MI F32:$Rs, ImmPred:$It)>;
+
+class T_FF_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs, F32:$Rt),
+ (MI F32:$Rs, F32:$Rt)>;
+
+class T_DD_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F64:$Rs, F64:$Rt),
+ (MI F64:$Rs, F64:$Rt)>;
+
+class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
+ (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
+
+class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)),
+ (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
+def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
+def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
+def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
+def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
+def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
+def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
+def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
+
+def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
+def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
+def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
+def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
+def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
+def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
+def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
+def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
+
+def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
+def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
+def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+
+//===----------------------------------------------------------------------===//
+// Multiply signed/unsigned halfwords with and without saturation and rounding
+// into a 64-bits destination register.
+//===----------------------------------------------------------------------===//
+
+def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
+def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
+def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
+def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
+def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
+def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
+def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
+def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
+
+def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
+
+def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
+def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
+def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
+def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
+def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
+def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
+def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
+def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
+def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
+def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
+def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
+def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>;
+def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>;
+def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
+def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>;
+def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>;
+def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
+def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>;
+def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>;
+def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
+def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>;
+def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>;
+def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
+def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
+
+// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
+def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>;
+def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>;
+def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>;
+def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
+def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>;
+def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
+
+def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>;
+def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
+def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>;
+def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
+
+// Vector reduce halfwords
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
+def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
+
+//===----------------------------------------------------------------------===//
+// Vector Multipy with accumulation
+//===----------------------------------------------------------------------===//
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
+def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
+def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
+def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
+def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
+def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
+def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
+def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
+def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
+def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
+def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
+def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
+def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
+def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
+def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
+def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
+def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
+def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
+def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
+
+//===----------------------------------------------------------------------===//
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+//Rd=add(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>;
+def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>;
+
+//Rd=add(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
+def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>;
+def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
+def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>;
+def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>;
+def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>;
+def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>;
+def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>;
+def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>;
+def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
+def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
+def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
+def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
+def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
+def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
+def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
+
+// ALU64 / ALU / min max
+def : T_RR_pat<A2_max, int_hexagon_A2_max>;
+def : T_RR_pat<A2_min, int_hexagon_A2_min>;
+def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
+def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
+
+// Shift and accumulate
+def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
+
+def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>;
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+def : T_RR_pat<A2_add, int_hexagon_A2_add>;
+def : T_RI_pat<A2_addi, int_hexagon_A2_addi>;
+def : T_RR_pat<A2_sub, int_hexagon_A2_sub>;
+def : T_IR_pat<A2_subri, int_hexagon_A2_subri>;
+def : T_RR_pat<A2_and, int_hexagon_A2_and>;
+def : T_RI_pat<A2_andir, int_hexagon_A2_andir>;
+def : T_RR_pat<A2_or, int_hexagon_A2_or>;
+def : T_RI_pat<A2_orir, int_hexagon_A2_orir>;
+def : T_RR_pat<A2_xor, int_hexagon_A2_xor>;
+def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+
+// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
+def : Pat <(int_hexagon_A2_not (I32:$Rs)),
+ (A2_subri -1, IntRegs:$Rs)>;
+
+// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32)
+def : Pat <(int_hexagon_A2_neg IntRegs:$Rs),
+ (A2_subri 0, IntRegs:$Rs)>;
+
+// Transfer immediate
+def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is),
+ (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>;
+def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is),
+ (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>;
+
+// Transfer Register/immediate.
+def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
+def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
+def : T_I_pat <A2_tfrpi, int_hexagon_A2_tfrpi>;
+
+// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
+def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src),
+ (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+// Combine
+def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
+def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
+def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
+def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
+
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
+
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
+
+// Mux
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
+
+// Shift halfword
+def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
+def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
+def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>;
+
+// Sign/zero extend
+def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
+def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
+def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
+def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+// Compare
+def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
+def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
+def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
+
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
+ (i32 (C2_cmpgti (I32:$src1),
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
+ (i32 (C2_cmpgtui (I32:$src1),
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+
+// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
+ (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1),
+ (I32:$src2))),
+ (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1),
+ (I32:$src2))),
+ (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>;
+
+/********************************************************************
+* ALU32/VH *
+*********************************************************************/
+// Vector add, subtract, average halfwords
+def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>;
+def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>;
+def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
+
+def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>;
+def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>;
+def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
+
+def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>;
+def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>;
+def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>;
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>;
+def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>;
+def: T_PP_pat<A2_addp, int_hexagon_A2_addp>;
+def: T_PP_pat<A2_subp, int_hexagon_A2_subp>;
+
+def: T_PP_pat<A2_andp, int_hexagon_A2_andp>;
+def: T_PP_pat<A2_orp, int_hexagon_A2_orp>;
+def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>;
+
+def: T_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>;
+def: T_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>;
+def: T_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>;
+
+def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>;
+def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>;
+
+/********************************************************************
+* ALU64/VB *
+*********************************************************************/
+// ALU64 - Vector add
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>;
+def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>;
+def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>;
+def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>;
+def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>;
+def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>;
+def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>;
+
+// ALU64 - Vector average
+def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>;
+def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>;
+def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>;
+def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>;
+def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>;
+def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>;
+def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>;
+
+def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>;
+def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>;
+def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>;
+def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>;
+def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>;
+
+// ALU64 - Vector negative average
+def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>;
+def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>;
+def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
+def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>;
+def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>;
+def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
+
+// ALU64 - Vector max
+def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>;
+def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>;
+def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>;
+def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>;
+def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>;
+
+// ALU64 - Vector min
+def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>;
+def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>;
+def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>;
+def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>;
+def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>;
+
+// ALU64 - Vector sub
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>;
+def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>;
+def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>;
+def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>;
+def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>;
+def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>;
+def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>;
+
+// ALU64 - Vector compare bytes
+def : T_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>;
+def : T_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>;
+def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 - Vector compare halfwords
+def : T_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>;
+def : T_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>;
+def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
+
+// ALU64 - Vector compare words
+def : T_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>;
+def : T_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>;
+def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VB / Vector mux.
+def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs, Rt)
+def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
+def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
+
+// Complex multiply real or imaginary
+def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>;
+def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>;
+
+// Complex multiply
+def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>;
+def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>;
+def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>;
+def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>;
+def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
+def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
+def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
+def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
+def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
+def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
+def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
+def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
+def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
+def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
+def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
+def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
+def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
+
+/********************************************************************
+* CR *
+*********************************************************************/
+class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> :
+ Pat<(i32 (IntID IntRegs:$Rs)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>;
+
+class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> :
+ Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>;
+
+def: qi_CRInst_qi_pat<C2_not, int_hexagon_C2_not>;
+def: qi_CRInst_qi_pat<C2_all8, int_hexagon_C2_all8>;
+def: qi_CRInst_qi_pat<C2_any8, int_hexagon_C2_any8>;
+
+def: qi_CRInst_qiqi_pat<C2_and, int_hexagon_C2_and>;
+def: qi_CRInst_qiqi_pat<C2_andn, int_hexagon_C2_andn>;
+def: qi_CRInst_qiqi_pat<C2_or, int_hexagon_C2_or>;
+def: qi_CRInst_qiqi_pat<C2_orn, int_hexagon_C2_orn>;
+def: qi_CRInst_qiqi_pat<C2_xor, int_hexagon_C2_xor>;
+
+// Assembler mapped from Pd4=Ps4 to Pd4=or(Ps4,Ps4)
+def : Pat<(int_hexagon_C2_pxfer_map PredRegs:$src),
+ (C2_pxfer_map PredRegs:$src)>;
+
+// Multiply 32x32 and use lower result
+def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
+def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
+def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
+
+// Subtract and accumulate
+def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
+
+// Add and accumulate
+def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>;
+def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>;
+def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>;
+def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
+
+// XOR and XOR with destination
+def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
+
+class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> :
+ Pat <(IntID IntRegs:$src1, IntRegs:$src2),
+ (OutputInst IntRegs:$src1, IntRegs:$src2)>;
+
+// Vector dual multiply with round and pack
+
+def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+// Vector multiply halfwords with round and pack
+
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>;
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>;
+
+// Multiply and use lower result
+def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>;
+def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>;
+
+// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
+def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>;
+def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>;
+def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>;
+
+// Complex multiply with round and pack
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>;
+
+/********************************************************************
+* STYPE/ALU *
+*********************************************************************/
+def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
+def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
+def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
+
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+
+// Count leading/trailing
+def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>;
+def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>;
+def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>;
+def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>;
+def: T_R_pat<S2_clb, int_hexagon_S2_clb>;
+def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>;
+def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
+def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>;
+def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>;
+
+// Compare bit mask
+def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>;
+def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
+def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>;
+
+// Vector shuffle
+def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
+def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
+def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
+def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
+
+// Vector truncate
+def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
+def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
+
+// Linear feedback-shift Iteration.
+def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
+
+// Vector splice
+def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
+def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
+
+// Shift by immediate and add
+def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
+
+// Extract bitfield
+def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>;
+def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>;
+def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>;
+def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
+
+// Insert bitfield
+def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2,
+ DoubleRegs:$src3),
+ (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1),
+ (I64:$src2), (I64:$src3))),
+ (i64 (S2_insertp_rp (I64:$src1), (I64:$src2),
+ (I64:$src3)))>;
+
+def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2,
+ u5ImmPred:$src3, u5ImmPred:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2,
+ u5ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1),
+ (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)),
+ (i64 (S2_insertp (I64:$src1), (I64:$src2),
+ u6ImmPred:$src3, u6ImmPred:$src4))>;
+
+
+// Innterleave/deinterleave
+def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
+def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
+
+// Set/Clear/Toggle Bit
+def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>;
+def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>;
+def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
+
+def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>;
+def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>;
+def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
+
+// Test Bit
+def: T_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>;
+def: T_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>;
+
+/********************************************************************
+* STYPE/COMPLEX *
+*********************************************************************/
+// Vector Complex conjugate
+def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
+
+// Vector Complex rotate
+def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
+
+/********************************************************************
+* STYPE/PERM *
+*********************************************************************/
+
+// Vector saturate without pack
+def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
+def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
+def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
+def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
+
+/********************************************************************
+* STYPE/PRED *
+*********************************************************************/
+
+// Predicate transfer
+def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))),
+ (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))),
+ (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+
+// Mask generate from predicate
+def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))),
+ (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>;
+
+// Viterbi pack even and odd predicate bits
+def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)),
+ (C2_tfrrp (I32:$Rt))))>;
+
+/********************************************************************
+* STYPE/SHIFT *
+*********************************************************************/
+
+def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
+def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
+def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
+
+def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
+def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
+def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
+def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
+
+def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
+def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
+def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
+def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
+
+def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
+def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
+
+def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>;
+def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>;
+def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>;
+def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>;
+def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
+def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>;
+
+// Vector saturate and pack
+def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>;
+def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
+def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>;
+def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>;
+def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>;
+def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>;
+
+def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>;
+def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>;
+def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>;
+def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
+def : T_R_pat <S2_brev, int_hexagon_S2_brev>;
+def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>;
+
+def : T_R_pat <A2_abs, int_hexagon_A2_abs>;
+def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
+def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
+
+def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>;
+
+def : T_P_pat <A2_sat, int_hexagon_A2_sat>;
+def : T_R_pat <A2_sath, int_hexagon_A2_sath>;
+def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>;
+def : T_R_pat <A2_satub, int_hexagon_A2_satub>;
+def : T_R_pat <A2_satb, int_hexagon_A2_satb>;
+
+// Vector arithmetic shift right by immediate with truncate and pack.
+def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
+
+def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>;
+def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>;
+def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>;
+def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
+def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
+ int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// Shift left by immediate with saturation.
+def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
+
+//===----------------------------------------------------------------------===//
+// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
+//===----------------------------------------------------------------------===//
+class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
+ SDNodeXForm XformImm>
+ : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4),
+ (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3,
+ (XformImm u5ImmPred:$src4))>;
+
+
+// Table Index : Extract and insert bits.
+// Map to the real hardware instructions after subtracting appropriate
+// values from the 4th input operand. Please note that subtraction is not
+// needed for int_hexagon_S2_tableidxb_goodsyntax.
+
+def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2,
+ u4ImmPred:$src3, u5ImmPred:$src4),
+ (S2_tableidxb IntRegs:$src1, IntRegs:$src2,
+ u4ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
+ DEC_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
+ DEC2_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
+ DEC3_CONST_SIGNED>;
+
+/********************************************************************
+* STYPE/VH *
+*********************************************************************/
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
+def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
+
+// Vector shift halfwords by immediate
+// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
+def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
+def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
+def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
+
+// Vector shift halfwords by register
+// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
+def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
+def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
+def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
+
+/********************************************************************
+* STYPE/VW *
+*********************************************************************/
+
+// Vector absolute value words with and without saturation
+def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
+def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
+
+// Vector shift words by immediate.
+// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
+def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
+def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
+def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
+
+// Vector shift words by register.
+// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
+def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
+def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
+def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
+
+// Vector shift words with truncate and pack
+
+def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
+
+def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
+def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
+
+def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
+ (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>;
+def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
+ (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+
+/********************************************************************
+* ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
+include "HexagonIntrinsicsV5.td"
+include "HexagonIntrinsicsV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 0000000..4c28b28
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,40 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+ (i64
+ (A2_combinew
+ (M2_maci
+ (M2_maci
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+ subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))),
+ subreg_hireg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))),
+ (i32
+ (EXTRACT_SUBREG
+ (i64
+ (M2_dpmpyuu_s0
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+ (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+ subreg_loreg)))), subreg_loreg))))>;
+
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 0000000..6152cb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,27 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Vector reduce complex multiply real or imaginary
+def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 0000000..c80a188
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,318 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>;
+
+// Vector reduce add unsigned halfwords
+def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>;
+
+def: T_P_pat <S2_ct0p, int_hexagon_S2_ct0p>;
+def: T_P_pat <S2_ct1p, int_hexagon_S2_ct1p>;
+def: T_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
+def: T_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
+def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+
+class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> :
+ Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2),
+ (MI (i64 DoubleRegs:$src1), immPred:$src2)>;
+
+def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>;
+
+def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
+def : T_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
+def : T_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
+def : T_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
+def : T_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
+def : T_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
+
+def : T_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
+def : T_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
+def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
+def : T_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
+def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>;
+
+def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+// Multiply 32x32 and use upper result
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+ u2ImmPred:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2ImmPred:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3, u2ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
+
+def: T_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// Compare
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
+
+def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Logical Operations On Predicates.
+
+class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> :
+ Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)),
+ (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs),
+ (C2_tfrrp IntRegs:$Rt),
+ (C2_tfrrp IntRegs:$Ru))))>;
+
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and, C4_and_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn, C4_and_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or, C4_and_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn, C4_and_orn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and, C4_or_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn, C4_or_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or, C4_or_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn, C4_or_orn>;
+
+/********************************************************************
+* XTYPE/ALU *
+*********************************************************************/
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
new file mode 100644
index 0000000..60e6b1e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -0,0 +1,111 @@
+//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//Rdd[+]=vrmpybsu(Rss,Rtt)
+//Rdd[+]=vrmpybuu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
+def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
+
+def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>;
+
+def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>;
+def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>;
+//Rxx+=vdmpybsu(Rss,Rtt):sat
+def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>;
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>;
+def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>;
+
+// Rxx+=vmpyb[s]u(Rs,Rt)
+def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>;
+def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
+
+// Rd=vaddhub(Rss,Rtt):sat
+def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
+}
+
+def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
+def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
+def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>;
+def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>;
+def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>;
+
+def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>;
+def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>;
+def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>;
+
+def: qi_CRInst_qiqi_pat<C4_fastcorner9, int_hexagon_C4_fastcorner9>;
+def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>;
+
+def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>;
+def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>;
+
+def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>;
+def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax,
+ int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+
+def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax,
+ int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+
+def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>;
+
+def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>;
+def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>;
+def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>;
+def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>;
+def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>;
+
+// Compare floating-point value
+def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>;
+def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>;
+def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>;
+def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>;
+
+def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>;
+def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>;
+def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>;
+def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>;
+
+// Create floating-point value
+def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>;
+def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>;
+def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>;
+def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>;
+
+def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>;
+def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>;
+def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>;
+def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>;
+def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>;
+def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>;
+def : T_R_pat <F2_conv_w2sf, int_hexagon_F2_conv_w2sf>;
+def : T_R_pat <F2_conv_w2df, int_hexagon_F2_conv_w2df>;
+def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>;
+def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>;
+def : T_P_pat <F2_conv_d2sf, int_hexagon_F2_conv_d2sf>;
+def : T_P_pat <F2_conv_d2df, int_hexagon_F2_conv_d2df>;
+def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>;
+def : T_F_pat <F2_conv_sf2w, int_hexagon_F2_conv_sf2w>;
+def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>;
+def : T_F_pat <F2_conv_sf2d, int_hexagon_F2_conv_sf2d>;
+def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>;
+def : T_D_pat <F2_conv_df2w, int_hexagon_F2_conv_df2w>;
+def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>;
+def : T_D_pat <F2_conv_df2d, int_hexagon_F2_conv_df2d>;
+def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>;
+def : T_F_pat <F2_conv_sf2w_chop, int_hexagon_F2_conv_sf2w_chop>;
+def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>;
+def : T_F_pat <F2_conv_sf2d_chop, int_hexagon_F2_conv_sf2d_chop>;
+def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>;
+def : T_D_pat <F2_conv_df2w_chop, int_hexagon_F2_conv_df2w_chop>;
+def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>;
+def : T_D_pat <F2_conv_df2d_chop, int_hexagon_F2_conv_df2d_chop>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
new file mode 100644
index 0000000..24a3e4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -0,0 +1,836 @@
+//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
+
+def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
+}
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=vassignp_W($src1)",
+ [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=vassignp_W_128B($src1)",
+ [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
+ VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
+
+let AddedComplexity = 100 in {
+def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_loreg)) >,
+ Requires<[UseHVXDbl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_hireg)) >,
+ Requires<[UseHVXDbl]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+ (v512i1 (V6_vandvrt
+ (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai_128B IntRegs:$addr, 0,
+ (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+ (v1024i1 (V6_vandvrt_128B
+ (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+ (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1),
+ (MI VectorRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1),
+ (MI VecPredRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+ (MI VecDblRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+ (MI VectorRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2),
+ (MI VecDblRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+ (MI VectorRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+ (MI VecPredRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+ (MI VecPredRegs:$src1, VecPredRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+
+multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3),
+ (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
+defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
+defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
+defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>;
+defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>;
+defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>;
+defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>;
+defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>;
+defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>;
+defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>;
+defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>;
+defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>;
+defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>;
+defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>;
+defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>;
+defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>;
+defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>;
+defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>;
+defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>;
+defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>;
+defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>;
+defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>;
+defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>;
+defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>;
+defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>;
+defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>;
+defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>;
+defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>;
+defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>;
+defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>;
+defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>;
+
+defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>;
+defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>;
+defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>;
+defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>;
+defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>;
+defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>;
+defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>;
+defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>;
+defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>;
+defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>;
+defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>;
+defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>;
+defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>;
+defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>;
+defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>;
+defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>;
+defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>;
+defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>;
+defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>;
+defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>;
+defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>;
+defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>;
+defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>;
+defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>;
+defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>;
+defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>;
+defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>;
+defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>;
+defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>;
+defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>;
+defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>;
+defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>;
+defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>;
+defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>;
+defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>;
+defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>;
+defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>;
+defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>;
+defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>;
+defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>;
+defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>;
+defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>;
+defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>;
+defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>;
+defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>;
+defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>;
+defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>;
+defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>;
+defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>;
+defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>;
+defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>;
+defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>;
+defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>;
+defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>;
+defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>;
+defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>;
+defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>;
+defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>;
+defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>;
+defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>;
+defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>;
+defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>;
+defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>;
+defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>;
+
+defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>;
+defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>;
+defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>;
+defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>;
+defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>;
+defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>;
+defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>;
+defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>;
+defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>;
+
+defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>;
+defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>;
+
+defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>;
+defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>;
+defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>;
+defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>;
+
+defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>;
+defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>;
+defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>;
+defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>;
+defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>;
+defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>;
+defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>;
+defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>;
+
+defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>;
+defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>;
+defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>;
+defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>;
+defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>;
+defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>;
+defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>;
+defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>;
+defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>;
+defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>;
+defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>;
+defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>;
+defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>;
+defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>;
+defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>;
+
+// Compare instructions
+defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>;
+defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>;
+defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>;
+defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>;
+defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>;
+defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>;
+defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>;
+defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>;
+defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>;
+defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>;
+defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>;
+defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>;
+defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>;
+defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>;
+defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>;
+defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>;
+defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>;
+defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>;
+defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>;
+defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>;
+defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>;
+defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>;
+defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>;
+defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>;
+defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>;
+defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>;
+defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>;
+
+defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>;
+defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>;
+defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>;
+defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>;
+defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>;
+defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>;
+defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>;
+defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>;
+defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>;
+defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>;
+defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>;
+defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>;
+defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>;
+defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>;
+defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>;
+defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>;
+defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>;
+defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>;
+defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>;
+defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>;
+defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>;
+defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>;
+defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>;
+defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>;
+defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>;
+defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>;
+defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>;
+defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>;
+defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>;
+defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>;
+defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>;
+defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>;
+defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>;
+defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>;
+defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>;
+defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>;
+defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>;
+defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>;
+defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>;
+defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>;
+defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>;
+defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>;
+defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>;
+defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>;
+defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>;
+defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>;
+
+defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>;
+defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>;
+defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>;
+defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>;
+defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>;
+defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>;
+defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>;
+defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>;
+defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>;
+defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>;
+defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>;
+defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>;
+
+defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>;
+defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>;
+defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>;
+defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>;
+defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>;
+defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>;
+defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>;
+defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>;
+defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>;
+defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>;
+defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>;
+defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>;
+defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>;
+defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>;
+defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>;
+defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>;
+defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>;
+defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>;
+defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>;
+defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>;
+defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
+defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
+defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
+
+defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
+defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
+defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
+
+defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>;
+defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>;
+defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>;
+
+// assembler mapped.
+//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>;
+// not present earlier.. need to add intrinsic
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>;
+defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>;
+defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>;
+defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>;
+defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>;
+defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>;
+defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>;
+
+defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>;
+defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>;
+
+defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>;
+defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>;
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>;
+
+defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>;
+defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>;
+defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>;
+defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>;
+defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>;
+defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>;
+defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>;
+defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>;
+defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>;
+defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>;
+defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>;
+defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>;
+defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>;
+defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>;
+defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>;
+defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>;
+defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>;
+
+defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>;
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>;
+defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>;
+defm : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>;
+defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>;
+
+defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>;
+defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>;
+defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>;
+defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>;
+
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>;
+def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>;
+def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>;
+def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>;
+def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>;
+def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>;
+def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>;
+def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>;
+def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>;
+def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>;
+def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>;
+def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
+
+defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
+defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
+
+def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+ (v64i16 (V6_vpackwh_sat_128B
+ (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
+ (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+ Requires<[UseHVXDbl]>;
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
new file mode 100644
index 0000000..0ca95e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
@@ -0,0 +1,728 @@
+//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon duplex instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// SA1_combine1i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine1i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#1, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b01;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+
+// SL2_jumpr31_f: Indirect conditional jump if false.
+// SL2_jumpr31_f -> SL2_jumpr31_fnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_f: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0) jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b101;
+ }
+
+// SL2_deallocframe: Deallocate stack frame.
+let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_deallocframe: SUBInst <
+ (outs ),
+ (ins ),
+ "deallocframe"> {
+ let Inst{12-6} = 0b1111100;
+ let Inst{2} = 0b0;
+ }
+
+// SL2_return_f: Deallocate stack frame and return.
+// SL2_return_f -> SL2_return_fnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_f: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0) dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b101;
+ }
+
+// SA1_combine3i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine3i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#3, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b11;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+
+// SS2_storebi0: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi0: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "memb($Rs + #$u4_0)=#0"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+
+ let Inst{12-8} = 0b10010;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_0;
+ }
+
+// SA1_clrtnew: Clear if true.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrtnew: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (p0.new) $Rd = #0"> {
+ bits<4> Rd;
+
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b100;
+ let Inst{3-0} = Rd;
+ }
+
+// SL2_loadruh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadruh_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+ "$Rd = memuh($Rs + #$u3_1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u3_1;
+
+ let Inst{12-11} = 0b01;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ }
+
+// SL2_jumpr31_tnew: Indirect conditional jump if true.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_tnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0.new) jumpr:nt r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b110;
+ }
+
+// SA1_addi: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
+def V4_SA1_addi: SUBInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$_src_, s7Ext:$s7),
+ "$Rx = add($_src_, #$s7)" ,
+ [] ,
+ "$_src_ = $Rx"> {
+ bits<4> Rx;
+ bits<7> s7;
+
+ let Inst{12-11} = 0b00;
+ let Inst{3-0} = Rx;
+ let Inst{10-4} = s7;
+ }
+
+// SL1_loadrub_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadrub_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "$Rd = memub($Rs + #$u4_0)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u4_0;
+
+ let Inst{12} = 0b1;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_0;
+ }
+
+// SL1_loadri_io: Load word.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadri_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "$Rd = memw($Rs + #$u4_2)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<6> u4_2;
+
+ let Inst{12} = 0b0;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_2{5-2};
+ }
+
+// SA1_cmpeqi: Compareimmed.
+let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_cmpeqi: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u2Imm:$u2),
+ "p0 = cmp.eq($Rs, #$u2)"> {
+ bits<4> Rs;
+ bits<2> u2;
+
+ let Inst{12-8} = 0b11001;
+ let Inst{7-4} = Rs;
+ let Inst{1-0} = u2;
+ }
+
+// SA1_combinerz: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinerz: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs),
+ "$Rdd = combine($Rs, #0)"> {
+ bits<3> Rdd;
+ bits<4> Rs;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b1;
+ let Inst{3} = 0b1;
+ let Inst{2-0} = Rdd;
+ let Inst{7-4} = Rs;
+ }
+
+// SL2_return_t: Deallocate stack frame and return.
+// SL2_return_t -> SL2_return_tnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_t: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0) dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b100;
+ }
+
+// SS2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_allocframe: SUBInst <
+ (outs ),
+ (ins u5_3Imm:$u5_3),
+ "allocframe(#$u5_3)"> {
+ bits<8> u5_3;
+
+ let Inst{12-9} = 0b1110;
+ let Inst{8-4} = u5_3{7-3};
+ }
+
+// SS2_storeh_io: Store half.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
+def V4_SS2_storeh_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
+ "memh($Rs + #$u3_1) = $Rt"> {
+ bits<4> Rs;
+ bits<4> u3_1;
+ bits<4> Rt;
+
+ let Inst{12-11} = 0b00;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ let Inst{3-0} = Rt;
+ }
+
+// SS2_storewi0: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi0: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "memw($Rs + #$u4_2)=#0"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+
+ let Inst{12-8} = 0b10000;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_2{5-2};
+ }
+
+// SS2_storewi1: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi1: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+ "memw($Rs + #$u4_2)=#1"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+
+ let Inst{12-8} = 0b10001;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_2{5-2};
+ }
+
+// SL2_jumpr31: Indirect conditional jump if true.
+let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31: SUBInst <
+ (outs ),
+ (ins ),
+ "jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2} = 0b0;
+ }
+
+// SA1_combinezr: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinezr: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins IntRegs:$Rs),
+ "$Rdd = combine(#0, $Rs)"> {
+ bits<3> Rdd;
+ bits<4> Rs;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b1;
+ let Inst{3} = 0b0;
+ let Inst{2-0} = Rdd;
+ let Inst{7-4} = Rs;
+ }
+
+// SL2_loadrh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrh_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+ "$Rd = memh($Rs + #$u3_1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<4> u3_1;
+
+ let Inst{12-11} = 0b00;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_1{3-1};
+ }
+
+// SA1_addrx: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addrx: SUBInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$_src_, IntRegs:$Rs),
+ "$Rx = add($_src_, $Rs)" ,
+ [] ,
+ "$_src_ = $Rx"> {
+ bits<4> Rx;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b11000;
+ let Inst{3-0} = Rx;
+ let Inst{7-4} = Rs;
+ }
+
+// SA1_setin1: Set to -1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_setin1: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "$Rd = #-1"> {
+ bits<4> Rd;
+
+ let Inst{12-9} = 0b1101;
+ let Inst{6} = 0b0;
+ let Inst{3-0} = Rd;
+ }
+
+// SA1_sxth: Sxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxth: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = sxth($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10100;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SA1_combine0i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine0i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#0, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b00;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+
+// SA1_combine2i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine2i: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u2Imm:$u2),
+ "$Rdd = combine(#2, #$u2)"> {
+ bits<3> Rdd;
+ bits<2> u2;
+
+ let Inst{12-10} = 0b111;
+ let Inst{8} = 0b0;
+ let Inst{4-3} = 0b10;
+ let Inst{2-0} = Rdd;
+ let Inst{6-5} = u2;
+ }
+
+// SA1_sxtb: Sxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxtb: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = sxtb($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10101;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SA1_clrf: Clear if false.
+// SA1_clrf -> SA1_clrfnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrf: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (!p0) $Rd = #0"> {
+ bits<4> Rd;
+
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b111;
+ let Inst{3-0} = Rd;
+ }
+
+// SL2_loadrb_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrb_io: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u3_0Imm:$u3_0),
+ "$Rd = memb($Rs + #$u3_0)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+ bits<3> u3_0;
+
+ let Inst{12-11} = 0b10;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ let Inst{10-8} = u3_0;
+ }
+
+// SA1_tfr: Tfr.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_tfr: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = $Rs"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10000;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SL2_loadrd_sp: Load dword.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_loadrd_sp: SUBInst <
+ (outs DoubleRegs:$Rdd),
+ (ins u5_3Imm:$u5_3),
+ "$Rdd = memd(r29 + #$u5_3)"> {
+ bits<3> Rdd;
+ bits<8> u5_3;
+
+ let Inst{12-8} = 0b11110;
+ let Inst{2-0} = Rdd;
+ let Inst{7-3} = u5_3{7-3};
+ }
+
+// SA1_and1: And #1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_and1: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = and($Rs, #1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10010;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SS2_storebi1: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi1: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+ "memb($Rs + #$u4_0)=#1"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+
+ let Inst{12-8} = 0b10011;
+ let Inst{7-4} = Rs;
+ let Inst{3-0} = u4_0;
+ }
+
+// SA1_inc: Inc.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_inc: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = add($Rs, #1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10001;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SS2_stored_sp: Store dword.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_stored_sp: SUBInst <
+ (outs ),
+ (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
+ "memd(r29 + #$s6_3) = $Rtt"> {
+ bits<9> s6_3;
+ bits<3> Rtt;
+
+ let Inst{12-9} = 0b0101;
+ let Inst{8-3} = s6_3{8-3};
+ let Inst{2-0} = Rtt;
+ }
+
+// SS2_storew_sp: Store word.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storew_sp: SUBInst <
+ (outs ),
+ (ins u5_2Imm:$u5_2, IntRegs:$Rt),
+ "memw(r29 + #$u5_2) = $Rt"> {
+ bits<7> u5_2;
+ bits<4> Rt;
+
+ let Inst{12-9} = 0b0100;
+ let Inst{8-4} = u5_2{6-2};
+ let Inst{3-0} = Rt;
+ }
+
+// SL2_jumpr31_fnew: Indirect conditional jump if false.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_fnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0.new) jumpr:nt r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b111;
+ }
+
+// SA1_clrt: Clear if true.
+// SA1_clrt -> SA1_clrtnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrt: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (p0) $Rd = #0"> {
+ bits<4> Rd;
+
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b110;
+ let Inst{3-0} = Rd;
+ }
+
+// SL2_return: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return: SUBInst <
+ (outs ),
+ (ins ),
+ "dealloc_return"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2} = 0b0;
+ }
+
+// SA1_dec: Dec.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_dec: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = add($Rs,#-1)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10011;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SA1_seti: Set immed.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
+def V4_SA1_seti: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u6Ext:$u6),
+ "$Rd = #$u6"> {
+ bits<4> Rd;
+ bits<6> u6;
+
+ let Inst{12-10} = 0b010;
+ let Inst{3-0} = Rd;
+ let Inst{9-4} = u6;
+ }
+
+// SL2_jumpr31_t: Indirect conditional jump if true.
+// SL2_jumpr31_t -> SL2_jumpr31_tnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_t: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0) jumpr r31"> {
+ let Inst{12-6} = 0b1111111;
+ let Inst{2-0} = 0b100;
+ }
+
+// SA1_clrfnew: Clear if false.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrfnew: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins ),
+ "if (!p0.new) $Rd = #0"> {
+ bits<4> Rd;
+
+ let Inst{12-9} = 0b1101;
+ let Inst{6-4} = 0b101;
+ let Inst{3-0} = Rd;
+ }
+
+// SS1_storew_io: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS1_storew_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
+ "memw($Rs + #$u4_2) = $Rt"> {
+ bits<4> Rs;
+ bits<6> u4_2;
+ bits<4> Rt;
+
+ let Inst{12} = 0b0;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_2{5-2};
+ let Inst{3-0} = Rt;
+ }
+
+// SA1_zxtb: Zxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxtb: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = and($Rs, #255)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10111;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
+// SA1_addsp: Add.
+let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addsp: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u6_2Imm:$u6_2),
+ "$Rd = add(r29, #$u6_2)"> {
+ bits<4> Rd;
+ bits<8> u6_2;
+
+ let Inst{12-10} = 0b011;
+ let Inst{3-0} = Rd;
+ let Inst{9-4} = u6_2{7-2};
+ }
+
+// SL2_loadri_sp: Load word.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadri_sp: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins u5_2Imm:$u5_2),
+ "$Rd = memw(r29 + #$u5_2)"> {
+ bits<4> Rd;
+ bits<7> u5_2;
+
+ let Inst{12-9} = 0b1110;
+ let Inst{3-0} = Rd;
+ let Inst{8-4} = u5_2{6-2};
+ }
+
+// SS1_storeb_io: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS1_storeb_io: SUBInst <
+ (outs ),
+ (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
+ "memb($Rs + #$u4_0) = $Rt"> {
+ bits<4> Rs;
+ bits<4> u4_0;
+ bits<4> Rt;
+
+ let Inst{12} = 0b1;
+ let Inst{7-4} = Rs;
+ let Inst{11-8} = u4_0;
+ let Inst{3-0} = Rt;
+ }
+
+// SL2_return_tnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_tnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (p0.new) dealloc_return:nt"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b110;
+ }
+
+// SL2_return_fnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_fnew: SUBInst <
+ (outs ),
+ (ins ),
+ "if (!p0.new) dealloc_return:nt"> {
+ let Inst{12-6} = 0b1111101;
+ let Inst{2-0} = 0b111;
+ }
+
+// SA1_zxth: Zxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxth: SUBInst <
+ (outs IntRegs:$Rd),
+ (ins IntRegs:$Rs),
+ "$Rd = zxth($Rs)"> {
+ bits<4> Rd;
+ bits<4> Rs;
+
+ let Inst{12-8} = 0b10110;
+ let Inst{3-0} = Rd;
+ let Inst{7-4} = Rs;
+ }
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
new file mode 100644
index 0000000..624c0f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -0,0 +1,146 @@
+//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Hexagon MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ HexagonAsmPrinter &Printer) {
+ MCContext &MC = Printer.OutContext;
+ const MCExpr *ME;
+
+ // Populate the relocation type based on Hexagon target flags
+ // set on an operand
+ MCSymbolRefExpr::VariantKind RelocationType;
+ switch (MO.getTargetFlags()) {
+ default:
+ RelocationType = MCSymbolRefExpr::VK_None;
+ break;
+ case HexagonII::MO_PCREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+ break;
+ case HexagonII::MO_GOT:
+ RelocationType = MCSymbolRefExpr::VK_GOT;
+ break;
+ case HexagonII::MO_LO16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16;
+ break;
+ case HexagonII::MO_HI16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16;
+ break;
+ case HexagonII::MO_GPREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL;
+ break;
+ }
+
+ ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC);
+
+ if (!MO.isJTI() && MO.getOffset())
+ ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
+ MC);
+
+ return MCOperand::createExpr(ME);
+}
+
+// Create an MCInst from a MachineInstr
+void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP) {
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ return;
+ }
+ if (MI->getOpcode() == Hexagon::ENDLOOP1) {
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ return;
+ }
+ MCInst *MCI = new (AP.OutContext) MCInst;
+ MCI->setOpcode(MI->getOpcode());
+ assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
+ "MCI opcode should have been set on construction");
+ bool MustExtend = false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCO;
+ if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
+ MustExtend = true;
+
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCO = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ // FP immediates are used only when setting GPRs, so they may be dealt
+ // with like regular immediates from this point on.
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(),
+ AP.OutContext));
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(MO.getImm(), AP.OutContext));
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCO = MCOperand::createExpr
+ (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(),
+ AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
+ AP);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+ break;
+ }
+
+ MCI->addOperand(MCO);
+ }
+ AP.HexagonProcessInstruction(*MCI, *MI);
+ HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI,
+ MustExtend);
+ MCB.addOperand(MCOperand::createInst(MCI));
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
new file mode 100644
index 0000000..9579c8b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void HexagonMachineFunctionInfo::anchor() {}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 0000000..7672358
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,85 @@
+//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
+
+namespace llvm {
+
+ namespace Hexagon {
+ const unsigned int StartPacket = 0x1;
+ const unsigned int EndPacket = 0x2;
+ }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register. This field
+ // holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+ unsigned StackAlignBaseReg;
+ std::vector<MachineInstr*> AllocaAdjustInsts;
+ int VarArgsFrameIndex;
+ bool HasClobberLR;
+ bool HasEHReturn;
+ std::map<const MachineInstr*, unsigned> PacketInfo;
+ virtual void anchor();
+
+public:
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0),
+ HasClobberLR(0), HasEHReturn(false) {}
+
+ HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ StackAlignBaseReg(0),
+ HasClobberLR(0),
+ HasEHReturn(false) {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ void addAllocaAdjustInst(MachineInstr* MI) {
+ AllocaAdjustInsts.push_back(MI);
+ }
+ const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+ return AllocaAdjustInsts;
+ }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ void setStartPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::StartPacket;
+ }
+ void setEndPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::EndPacket;
+ }
+ bool isStartPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+ }
+ bool isEndPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+ }
+ void setHasClobberLR(bool v) { HasClobberLR = v; }
+ bool hasClobberLR() const { return HasClobberLR; }
+
+ bool hasEHReturn() const { return HasEHReturn; };
+ void setHasEHReturn(bool H = true) { HasEHReturn = H; };
+
+ void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
+ unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
new file mode 100644
index 0000000..7a52d68
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -0,0 +1,699 @@
+//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineScheduler.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+/// Platform-specific modifications to DAG.
+void VLIWMachineScheduler::postprocessDAG() {
+ SUnit* LastSequentialCall = nullptr;
+ // Currently we only catch the situation when compare gets scheduled
+ // before preceding call.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ // Remember the call.
+ if (SUnits[su].getInstr()->isCall())
+ LastSequentialCall = &(SUnits[su]);
+ // Look for a compare that defines a predicate.
+ else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall)
+ SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+ }
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(SU->getInstr()))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ if (Packet[i]->Succs.size() == 0)
+ continue;
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU)) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ DEBUG(dbgs() << "\t[" << i << "] SU(");
+ DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= SchedModel->getIssueWidth()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ return startNewCycle;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ DEBUG(dbgs()
+ << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
+ << " " << BB->getName()
+ << " in_func " << BB->getParent()->getFunction()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB)
+ << " \n");
+
+ buildDAGWithRegPressure();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ // To view Height/Depth correctly, they should be accessed at least once.
+ //
+ // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+ // depth/height could be computed directly from the roots and leaves.
+ DEBUG(unsigned maxH = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getHeight() > maxH)
+ maxH = SUnits[su].getHeight();
+ dbgs() << "Max Height " << maxH << "\n";);
+ DEBUG(unsigned maxD = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ if (SUnits[su].getDepth() > maxD)
+ maxD = SUnits[su].getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";);
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler*>(dag);
+ SchedModel = DAG->getSchedModel();
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ delete Top.ResourceModel;
+ delete Bot.ResourceModel;
+ Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+
+ assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU);
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+ else
+ DEBUG(dbgs() << "*** IssueCount " << IssueCount
+ << " at cycle " << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ ResourceModel->reserveResources(nullptr);
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q,
+ SUnit *SU, PressureChange P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ SU->dump(DAG);
+}
+#endif
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return nullptr;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+ SUnit *OnlyAvailableSucc = nullptr;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ SUnit &Succ = *I->getSUnit();
+ if (!Succ.isScheduled) {
+ // We found an available, but not scheduled, successor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+ return nullptr;
+ OnlyAvailableSucc = &Succ;
+ }
+ }
+ return OnlyAvailableSucc;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 50;
+static const unsigned ScaleTwo = 10;
+static const unsigned FactorOne = 2;
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ ResCount += (SU->getHeight() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ } else {
+ ResCount += (SU->getDepth() * ScaleTwo);
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ // Factor in reg pressure as a heuristic.
+ ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo);
+ ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo);
+
+ DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ DEBUG(traceCandidate("CCAND", Q, *I));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult = pickNodeFromQueue(Top.Available,
+ DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ if (llvm::ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (llvm::ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+ SU->dump(DAG));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ } else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
new file mode 100644
index 0000000..6034344
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -0,0 +1,244 @@
+//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom Hexagon MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+class VLIWResourceModel {
+ /// ResourcesModel - Represents VLIW state.
+ /// Not limited to VLIW targets per say, but assumes
+ /// definition of DFA by a target.
+ DFAPacketizer *ResourcesModel;
+
+ const TargetSchedModel *SchedModel;
+
+ /// Local packet/bundle model. Purely
+ /// internal to the MI schedulre at the time.
+ std::vector<SUnit*> Packet;
+
+ /// Total packets created.
+ unsigned TotalPackets;
+
+public:
+ VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
+ : SchedModel(SM), TotalPackets(0) {
+ ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.resize(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ ~VLIWResourceModel() {
+ delete ResourcesModel;
+ }
+
+ void resetPacketState() {
+ Packet.clear();
+ }
+
+ void resetDFA() {
+ ResourcesModel->clearResources();
+ }
+
+ void reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+ }
+
+ bool isResourceAvailable(SUnit *SU);
+ bool reserveResources(SUnit *SU);
+ unsigned getTotalPackets() const { return TotalPackets; }
+};
+
+/// Extend the standard ScheduleDAGMI to provide more context and override the
+/// top-level schedule() driver.
+class VLIWMachineScheduler : public ScheduleDAGMILive {
+public:
+ VLIWMachineScheduler(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S)
+ : ScheduleDAGMILive(C, std::move(S)) {}
+
+ /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
+ /// time to do some work.
+ void schedule() override;
+ /// Perform platform-specific DAG postprocessing.
+ void postprocessDAG();
+};
+
+/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
+/// to balance the schedule.
+class ConvergingVLIWScheduler : public MachineSchedStrategy {
+
+ /// Store the state used by ConvergingVLIWScheduler heuristics, required
+ /// for the lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ // Best scheduling cost.
+ int SCost;
+
+ SchedCandidate(): SU(nullptr), SCost(0) {}
+ };
+ /// Represent the type of SchedCandidate found within a single queue.
+ enum CandResult {
+ NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
+ BestCost};
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in whichever direction at has moved, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct VLIWSchedBoundary {
+ VLIWMachineScheduler *DAG;
+ const TargetSchedModel *SchedModel;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ ScheduleHazardRecognizer *HazardRec;
+ VLIWResourceModel *ResourceModel;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ VLIWSchedBoundary(unsigned ID, const Twine &Name):
+ DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"),
+ Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
+ CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr),
+ CurrCycle(0), IssueCount(0),
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+ ~VLIWSchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+ }
+
+ void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
+ DAG = dag;
+ SchedModel = smodel;
+ }
+
+ bool isTop() const {
+ return Available.getID() == ConvergingVLIWScheduler::TopQID;
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
+ VLIWMachineScheduler *DAG;
+ const TargetSchedModel *SchedModel;
+
+ // State of the top and bottom scheduled instruction boundaries.
+ VLIWSchedBoundary Top;
+ VLIWSchedBoundary Bot;
+
+public:
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingVLIWScheduler()
+ : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"),
+ Bot(BotQID, "BotQ") {}
+
+ void initialize(ScheduleDAGMI *dag) override;
+
+ SUnit *pickNode(bool &IsTopNode) override;
+
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+
+ void releaseTopNode(SUnit *SU) override;
+
+ void releaseBottomNode(SUnit *SU) override;
+
+ unsigned ReportPackets() {
+ return Top.ResourceModel->getTotalPackets() +
+ Bot.ResourceModel->getTotalPackets();
+ }
+
+protected:
+ SUnit *pickNodeBidrectional(bool &IsTopNode);
+
+ int SchedulingCost(ReadyQueue &Q,
+ SUnit *SU, SchedCandidate &Candidate,
+ RegPressureDelta &Delta, bool verbose);
+
+ CandResult pickNodeFromQueue(ReadyQueue &Q,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+#ifndef NDEBUG
+ void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+ PressureChange P = PressureChange());
+#endif
+};
+
+} // namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
new file mode 100644
index 0000000..20c4ab1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -0,0 +1,689 @@
+//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements NewValueJump pass in Hexagon.
+// Ideally, we should merge this as a Peephole pass prior to register
+// allocation, but because we have a spill in between the feeder and new value
+// jump instructions, we are forced to write after register allocation.
+// Having said that, we should re-attempt to pull this earlier at some point
+// in future.
+
+// The basic approach looks for sequence of predicated jump, compare instruciton
+// that genereates the predicate and, the feeder to the predicate. Once it finds
+// all, it collapses compare and jump instruction into a new valu jump
+// intstructions.
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/PassSupport.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-nvj"
+
+STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
+
+static cl::opt<int>
+DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc(
+ "Maximum number of predicated jumps to be converted to New Value Jump"));
+
+static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable New Value Jumps"));
+
+namespace llvm {
+ FunctionPass *createHexagonNewValueJump();
+ void initializeHexagonNewValueJumpPass(PassRegistry&);
+}
+
+
+namespace {
+ struct HexagonNewValueJump : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+
+ public:
+ static char ID;
+
+ HexagonNewValueJump() : MachineFunctionPass(ID) {
+ initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const override {
+ return "Hexagon NewValueJump";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ private:
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+ };
+
+} // end of anonymous namespace
+
+char HexagonNewValueJump::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
+ "Hexagon NewValueJump", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
+ "Hexagon NewValueJump", false, false)
+
+
+// We have identified this II could be feeder to NVJ,
+// verify that it can be.
+static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ MachineBasicBlock::iterator end,
+ MachineBasicBlock::iterator skip,
+ MachineFunction &MF) {
+
+ // Predicated instruction can not be feeder to NVJ.
+ if (QII->isPredicated(II))
+ return false;
+
+ // Bail out if feederReg is a paired register (double regs in
+ // our case). One would think that we can check to see if a given
+ // register cmpReg1 or cmpReg2 is a sub register of feederReg
+ // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
+ // before the callsite of this function
+ // But we can not as it comes in the following fashion.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // Hence, we need to check if it's a KILL instruction.
+ if (II->getOpcode() == TargetOpcode::KILL)
+ return false;
+
+
+ // Make sure there there is no 'def' or 'use' of any of the uses of
+ // feeder insn between it's definition, this MI and jump, jmpInst
+ // skipping compare, cmpInst.
+ // Here's the example.
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // r4=memub(r3+r21<<#0)
+ // if (p0.new) jump:t .LBB29_45
+ // Without this check, it will be converted into
+ // r4=memub(r3+r21<<#0)
+ // r21=memub(r22+r24<<#0)
+ // p0 = cmp.eq(r21, #0)
+ // if (p0.new) jump:t .LBB29_45
+ // and result WAR hazards if converted to New Value Jump.
+
+ for (unsigned i = 0; i < II->getNumOperands(); ++i) {
+ if (II->getOperand(i).isReg() &&
+ (II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
+ MachineBasicBlock::iterator localII = II;
+ ++localII;
+ unsigned Reg = II->getOperand(i).getReg();
+ for (MachineBasicBlock::iterator localBegin = localII;
+ localBegin != end; ++localBegin) {
+ if (localBegin == skip ) continue;
+ // Check for Subregisters too.
+ if (localBegin->modifiesRegister(Reg, TRI) ||
+ localBegin->readsRegister(Reg, TRI))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// These are the common checks that need to performed
+// to determine if
+// 1. compare instruction can be moved before jump.
+// 2. feeder to the compare instruction can be moved before jump.
+static bool commonChecksToProhibitNewValueJump(bool afterRA,
+ MachineBasicBlock::iterator MII) {
+
+ // If store in path, bail out.
+ if (MII->getDesc().mayStore())
+ return false;
+
+ // if call in path, bail out.
+ if (MII->getOpcode() == Hexagon::J2_call)
+ return false;
+
+ // if NVJ is running prior to RA, do the following checks.
+ if (!afterRA) {
+ // The following Target Opcode instructions are spurious
+ // to new value jump. If they are in the path, bail out.
+ // KILL sets kill flag on the opcode. It also sets up a
+ // single register, out of pair.
+ // %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+ // %R0<def> = KILL %R0, %D0<imp-use,kill>
+ // %P0<def> = CMPEQri %R0<kill>, 0
+ // PHI can be anything after RA.
+ // COPY can remateriaze things in between feeder, compare and nvj.
+ if (MII->getOpcode() == TargetOpcode::KILL ||
+ MII->getOpcode() == TargetOpcode::PHI ||
+ MII->getOpcode() == TargetOpcode::COPY)
+ return false;
+
+ // The following pseudo Hexagon instructions sets "use" and "def"
+ // of registers by individual passes in the backend. At this time,
+ // we don't know the scope of usage and definitions of these
+ // instructions.
+ if (MII->getOpcode() == Hexagon::LDriw_pred ||
+ MII->getOpcode() == Hexagon::STriw_pred)
+ return false;
+ }
+
+ return true;
+}
+
+static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
+ const TargetRegisterInfo *TRI,
+ MachineBasicBlock::iterator II,
+ unsigned pReg,
+ bool secondReg,
+ bool optLocation,
+ MachineBasicBlock::iterator end,
+ MachineFunction &MF) {
+
+ MachineInstr *MI = II;
+
+ // If the second operand of the compare is an imm, make sure it's in the
+ // range specified by the arch.
+ if (!secondReg) {
+ int64_t v = MI->getOperand(2).getImm();
+
+ if (!(isUInt<5>(v) ||
+ ((MI->getOpcode() == Hexagon::C2_cmpeqi ||
+ MI->getOpcode() == Hexagon::C2_cmpgti) &&
+ (v == -1))))
+ return false;
+ }
+
+ unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+ cmpReg1 = MI->getOperand(1).getReg();
+
+ if (secondReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+
+ // Make sure that that second register is not from COPY
+ // At machine code level, we don't need this, but if we decide
+ // to move new value jump prior to RA, we would be needing this.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
+ MachineInstr *def = MRI.getVRegDef(cmpOp2);
+ if (def->getOpcode() == TargetOpcode::COPY)
+ return false;
+ }
+ }
+
+ // Walk the instructions after the compare (predicate def) to the jump,
+ // and satisfy the following conditions.
+ ++II ;
+ for (MachineBasicBlock::iterator localII = II; localII != end;
+ ++localII) {
+
+ // Check 1.
+ // If "common" checks fail, bail out.
+ if (!commonChecksToProhibitNewValueJump(optLocation, localII))
+ return false;
+
+ // Check 2.
+ // If there is a def or use of predicate (result of compare), bail out.
+ if (localII->modifiesRegister(pReg, TRI) ||
+ localII->readsRegister(pReg, TRI))
+ return false;
+
+ // Check 3.
+ // If there is a def of any of the use of the compare (operands of compare),
+ // bail out.
+ // Eg.
+ // p0 = cmp.eq(r2, r0)
+ // r2 = r4
+ // if (p0.new) jump:t .LBB28_3
+ if (localII->modifiesRegister(cmpReg1, TRI) ||
+ (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
+ return false;
+ }
+ return true;
+}
+
+
+// Given a compare operator, return a matching New Value Jump compare operator.
+// Make sure that MI here is included in isNewValueJumpCandidate.
+static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
+ bool secondRegNewified,
+ MachineBasicBlock *jmpTarget,
+ const MachineBranchProbabilityInfo
+ *MBPI) {
+ bool taken = false;
+ MachineBasicBlock *Src = MI->getParent();
+ const BranchProbability Prediction =
+ MBPI->getEdgeProbability(Src, jmpTarget);
+
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::C2_cmpeq:
+ return taken ? Hexagon::J4_cmpeq_t_jumpnv_t
+ : Hexagon::J4_cmpeq_t_jumpnv_nt;
+
+ case Hexagon::C2_cmpeqi: {
+ if (reg >= 0)
+ return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t
+ : Hexagon::J4_cmpeqi_t_jumpnv_nt;
+ else
+ return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t
+ : Hexagon::J4_cmpeqn1_t_jumpnv_nt;
+ }
+
+ case Hexagon::C2_cmpgt: {
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmplt_t_jumpnv_t
+ : Hexagon::J4_cmplt_t_jumpnv_nt;
+ else
+ return taken ? Hexagon::J4_cmpgt_t_jumpnv_t
+ : Hexagon::J4_cmpgt_t_jumpnv_nt;
+ }
+
+ case Hexagon::C2_cmpgti: {
+ if (reg >= 0)
+ return taken ? Hexagon::J4_cmpgti_t_jumpnv_t
+ : Hexagon::J4_cmpgti_t_jumpnv_nt;
+ else
+ return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t
+ : Hexagon::J4_cmpgtn1_t_jumpnv_nt;
+ }
+
+ case Hexagon::C2_cmpgtu: {
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmpltu_t_jumpnv_t
+ : Hexagon::J4_cmpltu_t_jumpnv_nt;
+ else
+ return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t
+ : Hexagon::J4_cmpgtu_t_jumpnv_nt;
+ }
+
+ case Hexagon::C2_cmpgtui:
+ return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
+ : Hexagon::J4_cmpgtui_t_jumpnv_nt;
+
+ case Hexagon::C4_cmpneq:
+ return taken ? Hexagon::J4_cmpeq_f_jumpnv_t
+ : Hexagon::J4_cmpeq_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplte:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmplt_f_jumpnv_t
+ : Hexagon::J4_cmplt_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgt_f_jumpnv_t
+ : Hexagon::J4_cmpgt_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplteu:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmpltu_f_jumpnv_t
+ : Hexagon::J4_cmpltu_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t
+ : Hexagon::J4_cmpgtu_f_jumpnv_nt;
+
+ default:
+ llvm_unreachable("Could not find matching New Value Jump instruction.");
+ }
+ // return *some value* to avoid compiler warning
+ return 0;
+}
+
+bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI)
+ const {
+ switch (MI->getOpcode()) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
+ << "********** Function: "
+ << MF.getName() << "\n");
+
+ // If we move NewValueJump before register allocation we'll need live variable
+ // analysis here too.
+
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = static_cast<const HexagonRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ if (DisableNewValueJumps) {
+ return false;
+ }
+
+ int nvjCount = DbgNVJCount;
+ int nvjGenerated = 0;
+
+ // Loop through all the bb's of the function
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+
+ DEBUG(dbgs() << "** dumping bb ** "
+ << MBB->getNumber() << "\n");
+ DEBUG(MBB->dump());
+ DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
+ bool foundJump = false;
+ bool foundCompare = false;
+ bool invertPredicate = false;
+ unsigned predReg = 0; // predicate reg of the jump.
+ unsigned cmpReg1 = 0;
+ int cmpOp2 = 0;
+ bool MO1IsKill = false;
+ bool MO2IsKill = false;
+ MachineBasicBlock::iterator jmpPos;
+ MachineBasicBlock::iterator cmpPos;
+ MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
+ MachineBasicBlock *jmpTarget = nullptr;
+ bool afterRA = false;
+ bool isSecondOpReg = false;
+ bool isSecondOpNewified = false;
+ // Traverse the basic block - bottom up
+ for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
+ MII != E;) {
+ MachineInstr *MI = --MII;
+ if (MI->isDebugValue()) {
+ continue;
+ }
+
+ if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
+ break;
+
+ DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
+
+ if (!foundJump &&
+ (MI->getOpcode() == Hexagon::J2_jumpt ||
+ MI->getOpcode() == Hexagon::J2_jumpf ||
+ MI->getOpcode() == Hexagon::J2_jumptnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumptnew ||
+ MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumpfnew)) {
+ // This is where you would insert your compare and
+ // instr that feeds compare
+ jmpPos = MII;
+ jmpInstr = MI;
+ predReg = MI->getOperand(0).getReg();
+ afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);
+
+ // If ifconverter had not messed up with the kill flags of the
+ // operands, the following check on the kill flag would suffice.
+ // if(!jmpInstr->getOperand(0).isKill()) break;
+
+ // This predicate register is live out out of BB
+ // this would only work if we can actually use Live
+ // variable analysis on phy regs - but LLVM does not
+ // provide LV analysis on phys regs.
+ //if(LVs.isLiveOut(predReg, *MBB)) break;
+
+ // Get all the successors of this block - which will always
+ // be 2. Check if the predicate register is live in in those
+ // successor. If yes, we can not delete the predicate -
+ // I am doing this only because LLVM does not provide LiveOut
+ // at the BB level.
+ bool predLive = false;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SIE = MBB->succ_end(); SI != SIE; ++SI) {
+ MachineBasicBlock* succMBB = *SI;
+ if (succMBB->isLiveIn(predReg)) {
+ predLive = true;
+ }
+ }
+ if (predLive)
+ break;
+
+ jmpTarget = MI->getOperand(1).getMBB();
+ foundJump = true;
+ if (MI->getOpcode() == Hexagon::J2_jumpf ||
+ MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+ MI->getOpcode() == Hexagon::J2_jumpfnew) {
+ invertPredicate = true;
+ }
+ continue;
+ }
+
+ // No new value jump if there is a barrier. A barrier has to be in its
+ // own packet. A barrier has zero operands. We conservatively bail out
+ // here if we see any instruction with zero operands.
+ if (foundJump && MI->getNumOperands() == 0)
+ break;
+
+ if (foundJump &&
+ !foundCompare &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(0).getReg() == predReg) {
+
+ // Not all compares can be new value compare. Arch Spec: 7.6.1.1
+ if (isNewValueJumpCandidate(MI)) {
+
+ assert((MI->getDesc().isCompare()) &&
+ "Only compare instruction can be collapsed into New Value Jump");
+ isSecondOpReg = MI->getOperand(2).isReg();
+
+ if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
+ afterRA, jmpPos, MF))
+ break;
+
+ cmpInstr = MI;
+ cmpPos = MII;
+ foundCompare = true;
+
+ // We need cmpReg1 and cmpOp2(imm or reg) while building
+ // new value jump instruction.
+ cmpReg1 = MI->getOperand(1).getReg();
+ if (MI->getOperand(1).isKill())
+ MO1IsKill = true;
+
+ if (isSecondOpReg) {
+ cmpOp2 = MI->getOperand(2).getReg();
+ if (MI->getOperand(2).isKill())
+ MO2IsKill = true;
+ } else
+ cmpOp2 = MI->getOperand(2).getImm();
+ continue;
+ }
+ }
+
+ if (foundCompare && foundJump) {
+
+ // If "common" checks fail, bail out on this BB.
+ if (!commonChecksToProhibitNewValueJump(afterRA, MII))
+ break;
+
+ bool foundFeeder = false;
+ MachineBasicBlock::iterator feederPos = MII;
+ if (MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isDef() &&
+ (MI->getOperand(0).getReg() == cmpReg1 ||
+ (isSecondOpReg &&
+ MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {
+
+ unsigned feederReg = MI->getOperand(0).getReg();
+
+ // First try to see if we can get the feeder from the first operand
+ // of the compare. If we can not, and if secondOpReg is true
+ // (second operand of the compare is also register), try that one.
+ // TODO: Try to come up with some heuristic to figure out which
+ // feeder would benefit.
+
+ if (feederReg == cmpReg1) {
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
+ if (!isSecondOpReg)
+ break;
+ else
+ continue;
+ } else
+ foundFeeder = true;
+ }
+
+ if (!foundFeeder &&
+ isSecondOpReg &&
+ feederReg == (unsigned) cmpOp2)
+ if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
+ break;
+
+ if (isSecondOpReg) {
+ // In case of CMPLT, or CMPLTU, or EQ with the second register
+ // to newify, swap the operands.
+ if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
+ feederReg == (unsigned) cmpOp2) {
+ unsigned tmp = cmpReg1;
+ bool tmpIsKill = MO1IsKill;
+ cmpReg1 = cmpOp2;
+ MO1IsKill = MO2IsKill;
+ cmpOp2 = tmp;
+ MO2IsKill = tmpIsKill;
+ }
+
+ // Now we have swapped the operands, all we need to check is,
+ // if the second operand (after swap) is the feeder.
+ // And if it is, make a note.
+ if (feederReg == (unsigned)cmpOp2)
+ isSecondOpNewified = true;
+ }
+
+ // Now that we are moving feeder close the jump,
+ // make sure we are respecting the kill values of
+ // the operands of the feeder.
+
+ bool updatedIsKill = false;
+ for (unsigned i = 0; i < MI->getNumOperands(); i++) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned feederReg = MO.getReg();
+ for (MachineBasicBlock::iterator localII = feederPos,
+ end = jmpPos; localII != end; localII++) {
+ MachineInstr *localMI = localII;
+ for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
+ MachineOperand &localMO = localMI->getOperand(j);
+ if (localMO.isReg() && localMO.isUse() &&
+ localMO.isKill() && feederReg == localMO.getReg()) {
+ // We found that there is kill of a use register
+ // Set up a kill flag on the register
+ localMO.setIsKill(false);
+ MO.setIsKill();
+ updatedIsKill = true;
+ break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+ }
+ if (updatedIsKill) break;
+ }
+
+ MBB->splice(jmpPos, MI->getParent(), MI);
+ MBB->splice(jmpPos, MI->getParent(), cmpInstr);
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ assert((isNewValueJumpCandidate(cmpInstr)) &&
+ "This compare is not a New Value Jump candidate.");
+ unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
+ isSecondOpNewified,
+ jmpTarget, MBPI);
+ if (invertPredicate)
+ opc = QII->getInvertedPredicatedOpcode(opc);
+
+ if (isSecondOpReg)
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addReg(cmpOp2, getKillRegState(MO2IsKill))
+ .addMBB(jmpTarget);
+
+ else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
+ cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
+ cmpOp2 == -1 )
+ // Corresponding new-value compare jump instructions don't have the
+ // operand for -1 immediate value.
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addMBB(jmpTarget);
+
+ else
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addImm(cmpOp2)
+ .addMBB(jmpTarget);
+
+ assert(NewMI && "New Value Jump Instruction Not created!");
+ (void)NewMI;
+ if (cmpInstr->getOperand(0).isReg() &&
+ cmpInstr->getOperand(0).isKill())
+ cmpInstr->getOperand(0).setIsKill(false);
+ if (cmpInstr->getOperand(1).isReg() &&
+ cmpInstr->getOperand(1).isKill())
+ cmpInstr->getOperand(1).setIsKill(false);
+ cmpInstr->eraseFromParent();
+ jmpInstr->eraseFromParent();
+ ++nvjGenerated;
+ ++NumNVJGenerated;
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+
+}
+
+FunctionPass *llvm::createHexagonNewValueJump() {
+ return new HexagonNewValueJump();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
new file mode 100644
index 0000000..fbd29cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -0,0 +1,603 @@
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
+def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
+def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
+def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
+def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
+def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
+def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
+def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
+def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
+def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
+def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
+def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
+def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
+def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
+def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
+def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
+def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
+def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
+def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
+def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
+def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
+def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
+def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
+def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
+// Immediate operands.
+
+let OperandType = "OPERAND_IMMEDIATE",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
+ let DecoderMethod = "s32ImmDecoder"; }
+ def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
+ def s6_3Imm : Operand<i32>;
+ def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
+ let DecoderMethod = "s4_1ImmDecoder"; }
+ def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
+ let DecoderMethod = "s4_2ImmDecoder"; }
+ def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
+ let DecoderMethod = "s4_3ImmDecoder"; }
+ def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
+ def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+ def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
+ def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
+ def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
+ def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
+ def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
+ def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
+ def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
+ def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
+ def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
+ def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
+ def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
+ def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+ def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
+ def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
+ def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
+ def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
+ def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
+ def u5_0Imm : Operand<i32>;
+ def u5_1Imm : Operand<i32>;
+ def u5_2Imm : Operand<i32>;
+ def u5_3Imm : Operand<i32>;
+ def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
+ def u4_0Imm : Operand<i32>;
+ def u4_1Imm : Operand<i32>;
+ def u4_2Imm : Operand<i32>;
+ def u4_3Imm : Operand<i32>;
+ def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
+ def u3_0Imm : Operand<i32>;
+ def u3_1Imm : Operand<i32>;
+ def u3_2Imm : Operand<i32>;
+ def u3_3Imm : Operand<i32>;
+ def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
+ def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
+ def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+ def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
+ let PrintMethod = "prints4_6ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
+ let PrintMethod = "prints3_6ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+ def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<30,2>(v);
+}]>;
+
+def s29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<29,3>(v);
+}]>;
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // Adding "N ||" to suppress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<29,3>(v);
+}]>;
+
+def u26_6ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<26,6>(v);
+}]>;
+
+def u16_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,1>(v);
+}]>;
+
+def u16_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,2>(v);
+}]>;
+
+def u11_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<11,3>(v);
+}]>;
+
+def u10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<10>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+ // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+ // unsigned field and is strictly greater than 0.
+ return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+def u4ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<4>(v);
+}]>;
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def u1ImmPred32 : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m5BImmPred : PatLeaf<(i32 imm), [{
+ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int8_t v = (int8_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred : PatLeaf<(i32 imm), [{
+ // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int16_t v = (int16_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred : PatLeaf<(i32 imm), [{
+ // m5ImmPred predicate - True if the number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred : PatLeaf<(i32 imm), [{
+ // nOneImmPred predicate - True if the immediate is -1.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-1 == v);
+}]>;
+
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+ // Set5ImmPred predicate - True if the number is in the series of values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in setbit immediate.
+ uint32_t v = (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+ // Clr5ImmPred predicate - True if the number is in the series of
+ // bit negated values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in clrbit immediate.
+ // Note: we are bit NOTing the value.
+ uint32_t v = ~ (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+ // True if the immediate is in range 0..31.
+ int32_t v = (int32_t)N->getSExtValue();
+ return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+ // Set4ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit immediate.
+ uint16_t v = (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+ // Clr4ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit and clrbit immediate.
+ uint16_t v = ~ (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+ // True if the immediate is in the range 0..15.
+ int16_t v = (int16_t)N->getSExtValue();
+ return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+ // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit immediate.
+ uint8_t v = (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+ // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit and clrbit immediate.
+ uint8_t v = ~ (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+ // True if the immediate is in the range 0..7.
+ int8_t v = (int8_t)N->getSExtValue();
+ return (v >= 0 && v <= 7);
+}]>;
+
+
+// Extendable immediate operands.
+def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
+def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
+def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
+def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
+def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
+def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
+def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
+def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
+def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
+def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
+def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
+def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
+def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
+def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
+def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
+def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
+def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
+def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
+def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
+def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
+
+
+
+let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
+ def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
+ let DecoderMethod = "s16ImmDecoder"; }
+ def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
+ let DecoderMethod = "s12ImmDecoder"; }
+ def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
+ let DecoderMethod = "s11_0ImmDecoder"; }
+ def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
+ let DecoderMethod = "s11_1ImmDecoder"; }
+ def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
+ let DecoderMethod = "s11_2ImmDecoder"; }
+ def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
+ let DecoderMethod = "s11_3ImmDecoder"; }
+ def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
+ let DecoderMethod = "s10ImmDecoder"; }
+ def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
+ let DecoderMethod = "s90ImmDecoder"; }
+ def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
+ def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
+ def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
+ def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
+ def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
+ def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
+ def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+ def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
+ def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
+ def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
+ def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
+ def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
+}
+
+
+def s4_7ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<4,7>(v);
+ return false;
+}]>;
+
+def s3_7ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<3,7>(v);
+ return false;
+}]>;
+
+def s4_6ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<4,6>(v);
+ return false;
+}]>;
+
+def s3_6ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<3,6>(v);
+ return false;
+}]>;
+
+
+// This complex pattern exists only to create a machine instruction operand
+// of type "frame index". There doesn't seem to be a way to do that directly
+// in the patterns.
+def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
+
+// These complex patterns are not strictly necessary, since global address
+// folding will happen during DAG combining. For distinguishing between GA
+// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
+def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
+def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
+
+// Address operands.
+
+let PrintMethod = "printGlobalOperand" in {
+ def globaladdress : Operand<i32>;
+ def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
+def brtargetExt : Operand<OtherVT> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
+def calltarget : Operand<i32> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
+
+// Return true if for a 32 to 64-bit sign-extended load.
+def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD)
+ return false;
+ return LD->getExtensionType() == ISD::SEXTLOAD &&
+ LD->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
new file mode 100644
index 0000000..1723771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -0,0 +1,150 @@
+//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include "Hexagon.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonOptimizeSZextends();
+ void initializeHexagonOptimizeSZextendsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonOptimizeSZextends : public FunctionPass {
+ public:
+ static char ID;
+ HexagonOptimizeSZextends() : FunctionPass(ID) {
+ initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ const char *getPassName() const override {
+ return "Remove sign extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool intrinsicAlreadySextended(Intrinsic::ID IntID);
+ };
+}
+
+char HexagonOptimizeSZextends::ID = 0;
+
+INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs",
+ "Remove Sign and Zero Extends for Args", false, false)
+
+bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) {
+ switch(IntID) {
+ case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ // Try to optimize sign extends in formal parameters. It's relying on
+ // callee already sign extending the values. I'm not sure if our ABI
+ // requires callee to sign extend though.
+ for (auto &Arg : F.args()) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ if (!isa<PointerType>(Arg.getType())) {
+ for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(&Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = &F.getEntryBlock().front();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ ++Idx;
+ }
+
+ // Try to remove redundant sext operations on Hexagon. The hardware
+ // already sign extends many 16 bit intrinsic operations to 32 bits.
+ // For example:
+ // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y)
+ // %sext233 = shl i32 %34, 16
+ // %conv52 = ashr exact i32 %sext233, 16
+ for (auto &B : F) {
+ for (auto &I : B) {
+ // Look for arithmetic shift right by 16.
+ BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I);
+ if (!(Ashr && Ashr->getOpcode() == Instruction::AShr))
+ continue;
+ Value *AshrOp1 = Ashr->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1);
+ // Right shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Ashr comes from logical shift left.
+ Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0));
+ if (!(Shl && Shl->getOpcode() == Instruction::Shl))
+ continue;
+ Value *Intr = Shl->getOperand(0);
+ Value *ShlOp1 = Shl->getOperand(1);
+ C = dyn_cast<ConstantInt>(ShlOp1);
+ // Left shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Shl comes from an intrinsic.
+ if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) {
+ if (!intrinsicAlreadySextended(I->getIntrinsicID()))
+ continue;
+ // All is well. Replace all uses of AShr with I.
+ for (auto UI = Ashr->user_begin(), UE = Ashr->user_end();
+ UI != UE; ++UI) {
+ const Use &TheUse = UI.getUse();
+ if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) {
+ J->replaceUsesOfWith(Ashr, I);
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+
+FunctionPass *llvm::createHexagonOptimizeSZextends() {
+ return new HexagonOptimizeSZextends();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
new file mode 100644
index 0000000..e68ff85
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -0,0 +1,338 @@
+//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This peephole pass optimizes in the following cases.
+// 1. Optimizes redundant sign extends for the following case
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+//
+// 2. Optimizes redundant negation of predicates.
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2
+// ...
+// %vreg16<def> = NOT_p %vreg15<kill>
+// ...
+// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead>
+//
+// Into
+// %vreg15<def> = CMPGTrr %vreg6, %vreg2;
+// ...
+// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>;
+//
+// Note: The peephole pass makes the instrucstions like
+// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
+// redundant and relies on some form of dead removal instructions, like
+// DCE or DIE to actually eliminate them.
+
+
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-peephole"
+
+static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Peephole Optimization"));
+
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of PNotP"));
+
+static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of Sign/Zero Extends"));
+
+static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of extensions to i64."));
+
+namespace llvm {
+ FunctionPass *createHexagonPeephole();
+ void initializeHexagonPeepholePass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonPeephole : public MachineFunctionPass {
+ const HexagonInstrInfo *QII;
+ const HexagonRegisterInfo *QRI;
+ const MachineRegisterInfo *MRI;
+
+ public:
+ static char ID;
+ HexagonPeephole() : MachineFunctionPass(ID) {
+ initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Hexagon optimize redundant zero and size extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
+ };
+}
+
+char HexagonPeephole::ID = 0;
+
+INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
+ false, false)
+
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ DenseMap<unsigned, unsigned> PeepholeMap;
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;
+
+ if (DisableHexagonPeephole) return false;
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+ PeepholeMap.clear();
+ PeepholeDoubleRegsMap.clear();
+
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ // Look for sign extends:
+ // %vreg170<def> = SXTW %vreg166
+ if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = SXTW %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
+ // %vreg170:DoublRegs, %vreg169:IntRegs
+ if (!DisableOptExtTo64 &&
+ MI->getOpcode () == Hexagon::A4_combineir) {
+ assert (MI->getNumOperands() == 3);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+ if (Src1.getImm() != 0)
+ continue;
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src2.getReg();
+ PeepholeMap[DstReg] = SrcReg;
+ }
+
+ // Look for this sequence below
+ // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
+ // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+ // and convert into
+ // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+ if (MI->getOpcode() == Hexagon::S2_lsr_i_p) {
+ assert(MI->getNumOperands() == 3);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+ if (Src2.getImm() != 32)
+ continue;
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src1.getReg();
+ PeepholeDoubleRegsMap[DstReg] =
+ std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
+ }
+
+ // Look for P=NOT(P).
+ if (!DisablePNotP &&
+ (MI->getOpcode() == Hexagon::C2_not)) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = NOT_xx %vreg166
+ // PeepholeMap[170] = vreg166
+ PeepholeMap[DstReg] = SrcReg;
+ }
+ }
+
+ // Look for copy:
+ // %vreg176<def> = COPY %vreg170:subreg_loreg
+ if (!DisableOptSZExt && MI->isCopy()) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+
+ // Make sure we are copying the lower 32 bits.
+ if (Src.getSubReg() != Hexagon::subreg_loreg)
+ continue;
+
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
+ // Change the 1st operand.
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+ } else {
+ DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
+ PeepholeDoubleRegsMap.find(SrcReg);
+ if (DI != PeepholeDoubleRegsMap.end()) {
+ std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
+ false /*isDef*/,
+ false /*isImp*/,
+ false /*isKill*/,
+ false /*isDead*/,
+ false /*isUndef*/,
+ false /*isEarlyClobber*/,
+ PeepholeSrc.second));
+ }
+ }
+ }
+ }
+
+ // Look for Predicated instructions.
+ if (!DisablePNotP) {
+ bool Done = false;
+ if (QII->isPredicated(MI)) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ unsigned Reg0 = Op0.getReg();
+ const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
+ if (RC0->getID() == Hexagon::PredRegsRegClassID) {
+ // Handle instructions that have a prediate register in op0
+ // (most cases of predicable instructions).
+ if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+ // Try to find in the map.
+ if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
+ // Change the 1st operand and, flip the opcode.
+ MI->getOperand(0).setReg(PeepholeSrc);
+ int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
+ MI->setDesc(QII->get(NewOp));
+ Done = true;
+ }
+ }
+ }
+ }
+
+ if (!Done) {
+ // Handle special instructions.
+ unsigned Op = MI->getOpcode();
+ unsigned NewOp = 0;
+ unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices.
+
+ switch (Op) {
+ case Hexagon::C2_mux:
+ case Hexagon::C2_muxii:
+ NewOp = Op;
+ break;
+ case Hexagon::C2_muxri:
+ NewOp = Hexagon::C2_muxir;
+ break;
+ case Hexagon::C2_muxir:
+ NewOp = Hexagon::C2_muxri;
+ break;
+ }
+ if (NewOp) {
+ unsigned PSrc = MI->getOperand(PR).getReg();
+ if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
+ MI->getOperand(PR).setReg(POrig);
+ MI->setDesc(QII->get(NewOp));
+ // Swap operands S1 and S2.
+ MachineOperand Op1 = MI->getOperand(S1);
+ MachineOperand Op2 = MI->getOperand(S2);
+ ChangeOpInto(MI->getOperand(S1), Op2);
+ ChangeOpInto(MI->getOperand(S2), Op1);
+ }
+ } // if (NewOp)
+ } // if (!Done)
+
+ } // if (!DisablePNotP)
+
+ } // Instruction
+ } // Basic Block
+ return true;
+}
+
+void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
+ assert (&Dst != &Src && "Cannot duplicate into itself");
+ switch (Dst.getType()) {
+ case MachineOperand::MO_Register:
+ if (Src.isReg()) {
+ Dst.setReg(Src.getReg());
+ } else if (Src.isImm()) {
+ Dst.ChangeToImmediate(Src.getImm());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ case MachineOperand::MO_Immediate:
+ if (Src.isImm()) {
+ Dst.setImm(Src.getImm());
+ } else if (Src.isReg()) {
+ Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
+ Src.isKill(), Src.isDead(), Src.isUndef(),
+ Src.isDebug());
+ } else {
+ llvm_unreachable("Unexpected src operand type");
+ }
+ break;
+
+ default:
+ llvm_unreachable("Unexpected dst operand type");
+ break;
+ }
+}
+
+FunctionPass *llvm::createHexagonPeephole() {
+ return new HexagonPeephole();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 0000000..61c0589
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,203 @@
+//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+HexagonRegisterInfo::HexagonRegisterInfo()
+ : HexagonGenRegisterInfo(Hexagon::R31) {}
+
+
+bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const {
+ return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 ||
+ R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1;
+}
+
+bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const {
+ return Hexagon::R16 <= Reg && Reg <= Hexagon::R27;
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
+ static const MCPhysReg CallerSavedRegsV4[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, 0
+ };
+
+ auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+ switch (HST.getHexagonArchVersion()) {
+ case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
+ return CallerSavedRegsV4;
+ }
+ llvm_unreachable(
+ "Callee saved registers requested for unknown archtecture version");
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const MCPhysReg CalleeSavedRegsV3[] = {
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
+ case HexagonSubtarget::V4:
+ case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
+ return CalleeSavedRegsV3;
+ }
+ llvm_unreachable("Callee saved registers requested for unknown architecture "
+ "version");
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+ const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(HEXAGON_RESERVED_REG_1);
+ Reserved.set(HEXAGON_RESERVED_REG_2);
+ Reserved.set(Hexagon::R29);
+ Reserved.set(Hexagon::R30);
+ Reserved.set(Hexagon::R31);
+ Reserved.set(Hexagon::PC);
+ Reserved.set(Hexagon::D15);
+ Reserved.set(Hexagon::LC0);
+ Reserved.set(Hexagon::LC1);
+ Reserved.set(Hexagon::SA0);
+ Reserved.set(Hexagon::SA1);
+ return Reserved;
+}
+
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOp,
+ RegScavenger *RS) const {
+ //
+ // Hexagon_TODO: Do we need to enforce this for Hexagon?
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MB = *MI.getParent();
+ MachineFunction &MF = *MB.getParent();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HFI = *HST.getFrameLowering();
+
+ unsigned BP = 0;
+ int FI = MI.getOperand(FIOp).getIndex();
+ // Select the base pointer (BP) and calculate the actual offset from BP
+ // to the beginning of the object at index FI.
+ int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+ // Add the offset from the instruction.
+ int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case Hexagon::TFR_FIA:
+ MI.setDesc(HII.get(Hexagon::A2_addi));
+ MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
+ MI.RemoveOperand(FIOp+1);
+ return;
+ case Hexagon::TFR_FI:
+ // Set up the instruction for updating below.
+ MI.setDesc(HII.get(Hexagon::A2_addi));
+ break;
+ }
+
+ if (HII.isValidOffset(Opc, RealOffset)) {
+ MI.getOperand(FIOp).ChangeToRegister(BP, false);
+ MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
+ return;
+ }
+
+#ifndef NDEBUG
+ const Function *F = MF.getFunction();
+ dbgs() << "In function ";
+ if (F) dbgs() << F->getName();
+ else dbgs() << "<?>";
+ dbgs() << ", BB#" << MB.getNumber() << "\n" << MI;
+#endif
+ llvm_unreachable("Unhandled instruction");
+}
+
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+ return Hexagon::R31;
+}
+
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+ &MF) const {
+ const HexagonFrameLowering *TFI = getFrameLowering(MF);
+ if (TFI->hasFP(MF))
+ return getFrameRegister();
+ return getStackRegister();
+}
+
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+ return Hexagon::R30;
+}
+
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+ return Hexagon::R29;
+}
+
+
+bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
+ const {
+ return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
+}
+
+
+unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
+ return Hexagon::R6;
+}
+
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 0000000..db7e0f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,89 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+
+//
+// We try not to hard code the reserved registers in our code,
+// so the following two macros were defined. However, there
+// are still a few places that R11 and R10 are hard wired.
+// See below. If, in the future, we decided to change the reserved
+// register. Don't forget changing the following places.
+//
+// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+// 3. the definition of "IntRegs" in HexagonRegisterInfo.td
+// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+class HexagonRegisterInfo : public HexagonGenRegisterInfo {
+public:
+ HexagonRegisterInfo();
+
+ /// Code Generation virtual methods...
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
+ const override;
+
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
+
+ /// Returns true since we may need scavenging for a temporary register
+ /// when generating hardware loop instructions.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ /// Returns true. Spill code for predicate registers might need an extra
+ /// register.
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ /// Returns true if the frame pointer is valid.
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+ unsigned getFrameRegister() const;
+ unsigned getStackRegister() const;
+
+ const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const;
+
+ unsigned getFirstCallerSavedNonParamReg() const;
+
+ bool isEHReturnCalleeSaveReg(unsigned Reg) const;
+ bool isCalleeSaveReg(unsigned Reg) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 0000000..81629dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,270 @@
+//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Hexagon" in {
+
+ class HexagonReg<bits<5> num, string n, list<string> alt = [],
+ list<Register> alias = []> : Register<n> {
+ field bits<5> Num;
+ let Aliases = alias;
+ let HWEncoding{4-0} = num;
+ }
+
+ class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+
+ let AltNames = alt;
+ let HWEncoding{4-0} = num;
+ }
+
+ // Registers are identified with 5-bit ID numbers.
+ // Ri - 32-bit integer registers.
+ class Ri<bits<5> num, string n, list<string> alt = []> : HexagonReg<num, n, alt> {
+ let Num = num;
+ }
+
+ // Rf - 32-bit floating-point registers.
+ class Rf<bits<5> num, string n> : HexagonReg<num, n> {
+ let Num = num;
+ }
+
+
+ // Rd - 64-bit registers.
+ class Rd<bits<5> num, string n, list<Register> subregs> :
+ HexagonDoubleReg<num, n, subregs> {
+ let Num = num;
+ let SubRegs = subregs;
+ }
+
+ // Rp - predicate registers
+ class Rp<bits<5> num, string n> : HexagonReg<num, n> {
+ let Num = num;
+ }
+
+
+ // Rq - vector predicate registers
+ class Rq<bits<3> num, string n> : Register<n, []> {
+ let HWEncoding{2-0} = num;
+ }
+
+ // Rc - control registers
+ class Rc<bits<5> num, string n,
+ list<string> alt = [], list<Register> alias = []> :
+ HexagonReg<num, n, alt, alias> {
+ let Num = num;
+ }
+
+ // Rcc - 64-bit control registers.
+ class Rcc<bits<5> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ HexagonDoubleReg<num, n, subregs, alt> {
+ let Num = num;
+ let SubRegs = subregs;
+ }
+
+ // Mx - address modifier registers
+ class Mx<bits<1> num, string n> : HexagonReg<{0b0000, num}, n> {
+ let Num = !cast<bits<5>>(num);
+ }
+
+ def subreg_loreg : SubRegIndex<32>;
+ def subreg_hireg : SubRegIndex<32, 32>;
+ def subreg_overflow : SubRegIndex<1, 0>;
+
+ // Integer registers.
+ foreach i = 0-28 in {
+ def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>;
+ }
+
+ def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
+
+ // Aliases of the R* registers used to hold 64-bit int values (doubles).
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+ }
+
+ // Predicate registers.
+ def P0 : Rp<0, "p0">, DwarfRegNum<[63]>;
+ def P1 : Rp<1, "p1">, DwarfRegNum<[64]>;
+ def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
+ def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
+
+ // Modifier registers.
+ // C6 and C7 can also be M0 and M1, but register names must be unique, even
+ // if belonging to different register classes.
+ def M0 : Mx<0, "m0">, DwarfRegNum<[72]>;
+ def M1 : Mx<1, "m1">, DwarfRegNum<[73]>;
+
+ // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc-
+ // tions modify this bit, and multiple such instructions are allowed in the
+ // same packet. We need to ignore output dependencies on this bit, but not
+ // on the entire USR.
+ def USR_OVF : Rc<?, "usr.ovf">;
+
+ // Control registers.
+ def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
+ def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
+ def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
+ def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
+ def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
+ DwarfRegNum<[71]>;
+ def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use
+ def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
+ def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
+
+ def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> {
+ let SubRegIndices = [subreg_overflow];
+ let SubRegs = [USR_OVF];
+ }
+ def PC : Rc<9, "pc">, DwarfRegNum<[76]>;
+ def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP : Rc<11, "gp">, DwarfRegNum<[78]>;
+ def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+}
+
+ // Control registers pairs.
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+ def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+ def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
+ def C9_8 : Rcc<8, "c9:8", [USR, PC]>, DwarfRegNum<[74]>;
+ def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
+ def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
+ def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
+ }
+
+ foreach i = 0-31 in {
+ def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
+ }
+
+ // Aliases of the V* registers used to hold double vec values.
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>;
+ def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>;
+ def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>;
+ def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>;
+ def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>;
+ def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
+ def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
+ def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
+ def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
+ def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
+ def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
+ def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
+ def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
+ def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
+ def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
+ def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+ }
+
+ // Vector Predicate registers.
+ def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
+ def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
+ def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
+ def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+ (add (sequence "R%u", 0, 9),
+ (sequence "R%u", 12, 28),
+ R10, R11, R29, R30, R31)> {
+}
+
+// Registers are listed in reverse order for allocation preference reasons.
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+ (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+ (add (sequence "D%u", 0, 4),
+ (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "W%u", 0, 15))>;
+
+def VectorRegs128B : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs128B : RegisterClass<"Hexagon",
+ [v256i8,v128i16,v64i32,v32i64], 2048,
+ (add (sequence "W%u", 0, 15))>;
+
+def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512,
+ (add (sequence "Q%u", 0, 3))>;
+
+def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024,
+ (add (sequence "Q%u", 0, 3))>;
+
+def PredRegs : RegisterClass<"Hexagon",
+ [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
+ (add (sequence "P%u", 0, 3))>
+{
+ let Size = 32;
+}
+
+let Size = 32 in
+def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
+
+let Size = 32, isAllocatable = 0 in
+def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add LC0, SA0, LC1, SA1,
+ P3_0,
+ M0, M1, C6, C7, CS0, CS1, UPCL, UPCH,
+ USR, USR_OVF, UGP, GP, PC)>;
+
+let Size = 64, isAllocatable = 0 in
+def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
+ (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>;
+
+def VolatileV3 {
+ list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7,
+ R28, R31,
+ P0, P1, P2, P3,
+ M0, M1,
+ LC0, LC1, SA0, SA1, USR, USR_OVF];
+}
+
+def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a),
+[{
+ return isPositiveHalfWord(N);
+}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 0000000..6e4987b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,24 @@
+//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+// V55 Machine Info +
+include "HexagonScheduleV55.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Machine Info -
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 0000000..67af147
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,206 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+// Functional Units.
+def SLOT0 : FuncUnit;
+def SLOT1 : FuncUnit;
+def SLOT2 : FuncUnit;
+def SLOT3 : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
+
+// Itinerary classes.
+def PSEUDO : InstrItinClass;
+def PSEUDOM : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def DUPLEX : InstrItinClass;
+def PREFIX : InstrItinClass;
+def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass;
+def COMPOUND : InstrItinClass;
+
+def ALU32_2op_tc_1_SLOT0123 : InstrItinClass;
+def ALU32_2op_tc_2early_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_2early_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_1_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_2_SLOT0123 : InstrItinClass;
+def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass;
+def ALU64_tc_1_SLOT23 : InstrItinClass;
+def ALU64_tc_1or2_SLOT23 : InstrItinClass;
+def ALU64_tc_2_SLOT23 : InstrItinClass;
+def ALU64_tc_2early_SLOT23 : InstrItinClass;
+def ALU64_tc_3x_SLOT23 : InstrItinClass;
+def CR_tc_2_SLOT3 : InstrItinClass;
+def CR_tc_2early_SLOT23 : InstrItinClass;
+def CR_tc_2early_SLOT3 : InstrItinClass;
+def CR_tc_3x_SLOT23 : InstrItinClass;
+def CR_tc_3x_SLOT3 : InstrItinClass;
+def J_tc_2early_SLOT23 : InstrItinClass;
+def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass;
+def J_tc_2early_SLOT2 : InstrItinClass;
+def LD_tc_ld_SLOT01 : InstrItinClass;
+def LD_tc_ld_SLOT0 : InstrItinClass;
+def LD_tc_3or4stall_SLOT0 : InstrItinClass;
+def M_tc_1_SLOT23 : InstrItinClass;
+def M_tc_1or2_SLOT23 : InstrItinClass;
+def M_tc_2_SLOT23 : InstrItinClass;
+def M_tc_3_SLOT23 : InstrItinClass;
+def M_tc_3x_SLOT23 : InstrItinClass;
+def M_tc_3or4x_SLOT23 : InstrItinClass;
+def ST_tc_st_SLOT01 : InstrItinClass;
+def ST_tc_st_SLOT0 : InstrItinClass;
+def ST_tc_ld_SLOT0 : InstrItinClass;
+def ST_tc_3stall_SLOT0 : InstrItinClass;
+def S_2op_tc_1_SLOT23 : InstrItinClass;
+def S_2op_tc_2_SLOT23 : InstrItinClass;
+def S_2op_tc_2early_SLOT23 : InstrItinClass;
+def S_2op_tc_3or4x_SLOT23 : InstrItinClass;
+def S_3op_tc_1_SLOT23 : InstrItinClass;
+def S_3op_tc_1or2_SLOT23 : InstrItinClass;
+def S_3op_tc_2_SLOT23 : InstrItinClass;
+def S_3op_tc_2early_SLOT23 : InstrItinClass;
+def S_3op_tc_3_SLOT23 : InstrItinClass;
+def S_3op_tc_3x_SLOT23 : InstrItinClass;
+def NCJ_tc_3or4stall_SLOT0 : InstrItinClass;
+def V2LDST_tc_ld_SLOT01 : InstrItinClass;
+def V2LDST_tc_st_SLOT0 : InstrItinClass;
+def V2LDST_tc_st_SLOT01 : InstrItinClass;
+def V4LDST_tc_ld_SLOT01 : InstrItinClass;
+def V4LDST_tc_st_SLOT0 : InstrItinClass;
+def V4LDST_tc_st_SLOT01 : InstrItinClass;
+def J_tc_2early_SLOT0123 : InstrItinClass;
+def EXTENDER_tc_1_SLOT0123 : InstrItinClass;
+def S_3op_tc_3stall_SLOT23 : InstrItinClass;
+
+
+def HexagonItinerariesV4 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ // CR
+ InstrItinData<CR_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // J
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<1, [SLOT2]>]>,
+
+ //Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // Store
+ // ST
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ // ST0
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // SYS
+ InstrItinData<ST_tc_3stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Mem ops - MEM_V4
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+
+ // ENDLOOP
+ InstrItinData<J_tc_2early_SLOT0123 , [InstrStage<1, [SLOT_ENDLOOP]>]>,
+
+ // Extender/PREFIX
+ InstrItinData<EXTENDER_tc_1_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
+ ]>;
+
+def HexagonModelV4 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV4;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
new file mode 100644
index 0000000..d9ad25d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -0,0 +1,170 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+def CJ_tc_1_SLOT23 : InstrItinClass;
+def CJ_tc_2early_SLOT23 : InstrItinClass;
+def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass;
+def COPROC_VX_vtc_long_SLOT23 : InstrItinClass;
+def COPROC_VX_vtc_SLOT23 : InstrItinClass;
+def J_tc_3stall_SLOT2 : InstrItinClass;
+def MAPPING_tc_1_SLOT0123 : InstrItinClass;
+def M_tc_3stall_SLOT23 : InstrItinClass;
+def SUBINSN_tc_1_SLOT01 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT0 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT01 : InstrItinClass;
+def SUBINSN_tc_3stall_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT01 : InstrItinClass;
+def SUBINSN_tc_st_SLOT01 : InstrItinClass;
+
+def HexagonItinerariesV55 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Misc
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
+
+ ]>;
+
+def HexagonModelV55 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV55;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
new file mode 100644
index 0000000..2ccff82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -0,0 +1,310 @@
+//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec".
+def CVI_ST : FuncUnit;
+def CVI_XLANE : FuncUnit;
+def CVI_SHIFT : FuncUnit;
+def CVI_MPY0 : FuncUnit;
+def CVI_MPY1 : FuncUnit;
+def CVI_LD : FuncUnit;
+
+// Combined functional units.
+def CVI_XLSHF : FuncUnit;
+def CVI_MPY01 : FuncUnit;
+def CVI_ALL : FuncUnit;
+
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+ ComboFuncUnits<[
+ ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
+ ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
+ ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1, CVI_LD]>
+ ]>;
+
+// Note: When adding additional vector scheduling classes, add the
+// corresponding methods to the class HexagonInstrInfo.
+def CVI_VA : InstrItinClass;
+def CVI_VA_DV : InstrItinClass;
+def CVI_VX_LONG : InstrItinClass;
+def CVI_VX_LATE : InstrItinClass;
+def CVI_VX : InstrItinClass;
+def CVI_VX_DV_LONG : InstrItinClass;
+def CVI_VX_DV : InstrItinClass;
+def CVI_VX_DV_SLOT2 : InstrItinClass;
+def CVI_VP : InstrItinClass;
+def CVI_VP_LONG : InstrItinClass;
+def CVI_VP_VS_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG : InstrItinClass;
+def CVI_VP_VS : InstrItinClass;
+def CVI_VP_DV : InstrItinClass;
+def CVI_VS : InstrItinClass;
+def CVI_VINLANESAT : InstrItinClass;
+def CVI_VM_LD : InstrItinClass;
+def CVI_VM_TMP_LD : InstrItinClass;
+def CVI_VM_CUR_LD : InstrItinClass;
+def CVI_VM_VP_LDU : InstrItinClass;
+def CVI_VM_ST : InstrItinClass;
+def CVI_VM_NEW_ST : InstrItinClass;
+def CVI_VM_STU : InstrItinClass;
+def CVI_HIST : InstrItinClass;
+def CVI_VA_EXT : InstrItinClass;
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine.
+// This file describes that machine information.
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+//
+//
+// In addition to using the above SLOTS, there are also six vector pipelines
+// in the CVI co-processor in the Hexagon V60 machine.
+//
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST |
+// ==== |=========| |=========| |=========| |=========| |=========| |=========|
+// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | |
+// S2-3 | | | CVI_VX | | CVI_VX | | | | | | |
+// S0-3 | | | | | | | | | CVI_VP | | |
+// S0-3 | | | | | | | CVI_VS | | | | |
+// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | |
+// S0-1 |(C*TMP_LD) | | | | | | | | | |
+// S01 |(C*_LDU) | | | | | | | | C*_LDU | | |
+// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) |
+// S0 | | | | | | | | | | |(C*TMP_ST)
+// S01 | | | | | | | | | VSTU | |(C*_STU) |
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// |=====================| |=====================|
+// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT|
+// |=====================| |=====================|
+// S0-3 | CVI_VA_DV | | CVI_VA_DV |
+// S0-3 | | | CVI_VP_DV |
+// S2-3 | CVI_VX_DV | | |
+// |=====================| |=====================|
+// |=====================================================================|
+// S0-3 | CVI_HIST Histogram |
+// S0123| CVI_VA_EXT Extract |
+// |=====================================================================|
+
+def HexagonItinerariesV60 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+ [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Duplex and Compound
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // Misc
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // Latest CVI spec definitions.
+ InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VA_DV,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_DV_LONG,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV_SLOT2,
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_VS_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_DV , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VINLANESAT,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VM_LD , [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>]>,
+ InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VM_ST , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>]>,
+ InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_HIST , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>]>
+ ]>;
+
+def HexagonModelV60 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV60;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V60 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 0000000..d8feb89
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGT)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGT)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLT)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULE)),
+ (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGE)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGE)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+// pt = not(p1 xor p2)
+// Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+ IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETGT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETLT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 0000000..239dbda
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,63 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+ return SDValue();
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (SizeVal < 32 || (SizeVal % 8) != 0)
+ return SDValue();
+
+ // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
+ //
+ const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+ Entry.Node = Src;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ const char *SpecialMemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getTargetExternalSymbol(
+ SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 0000000..80ac5d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,35 @@
+//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
new file mode 100644
index 0000000..10fe606
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -0,0 +1,168 @@
+//=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When the compiler is invoked with no small data, for instance, with the -G0
+// command line option, then all CONST32_* opcodes should be broken down into
+// appropriate LO and HI instructions. This splitting is done by this pass.
+// The only reason this is not done in the DAG lowering itself is that there
+// is no simple way of getting the register allocator to allot the same hard
+// register to the result of LO and HI instructions. This pass is always
+// scheduled after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "xfer"
+
+namespace llvm {
+ FunctionPass *createHexagonSplitConst32AndConst64();
+ void initializeHexagonSplitConst32AndConst64Pass(PassRegistry&);
+}
+
+namespace {
+
+class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "Hexagon Split Const32s and Const64s";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonSplitConst32AndConst64::ID = 0;
+
+
+bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonTargetObjectFile &TLOF =
+ *static_cast<const HexagonTargetObjectFile *>(
+ Fn.getTarget().getObjFileLowering());
+ if (TLOF.IsSmallDataEnabled())
+ return true;
+
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+ // Traverse the basic block
+ MachineBasicBlock::iterator MII = MBB->begin();
+ MachineBasicBlock::iterator MIE = MBB->end ();
+ while (MII != MIE) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::CONST32_Int_Real &&
+ MI->getOperand(1).isBlockAddress()) {
+ int DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Symbol = MI->getOperand (1);
+
+ BuildMI (*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
+ BuildMI (*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
+ // MBB->erase returns the iterator to the next instruction, which is the
+ // one we want to process next
+ MII = MBB->erase (MI);
+ continue;
+ }
+
+ else if (Opc == Hexagon::CONST32_Int_Real ||
+ Opc == Hexagon::CONST32_Float_Real) {
+ int DestReg = MI->getOperand(0).getReg();
+
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST32_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
+ MII = MBB->erase (MI);
+ continue;
+ }
+ else if (Opc == Hexagon::CONST64_Int_Real ||
+ Opc == Hexagon::CONST64_Float_Real) {
+ int DestReg = MI->getOperand(0).getReg();
+
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST64_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
+
+ int32_t LowWord = (ImmValue & 0xFFFFFFFF);
+ int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
+
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
+ BuildMI (*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
+ MII = MBB->erase (MI);
+ continue;
+ }
+ ++MII;
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *
+llvm::createHexagonSplitConst32AndConst64() {
+ return new HexagonSplitConst32AndConst64();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
new file mode 100644
index 0000000..d4e95b0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -0,0 +1,1209 @@
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonSplitDoubleRegs();
+ void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+ cl::desc("Maximum number of split partitions"));
+ static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+ cl::desc("Do not split loads or stores"));
+
+ class HexagonSplitDoubleRegs : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+ TII(nullptr) {
+ initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon Split Double Registers";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ static const TargetRegisterClass *const DoubleRC;
+
+ const HexagonRegisterInfo *TRI;
+ const HexagonInstrInfo *TII;
+ const MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ typedef std::set<unsigned> USet;
+ typedef std::map<unsigned,USet> UUSetMap;
+ typedef std::pair<unsigned,unsigned> UUPair;
+ typedef std::map<unsigned,UUPair> UUPairMap;
+ typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+ bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+ bool isVolatileInstr(const MachineInstr *MI) const;
+ bool isFixedInstr(const MachineInstr *MI) const;
+ void partitionRegisters(UUSetMap &P2Rs);
+ int32_t profit(const MachineInstr *MI) const;
+ bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+ void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+ void collectIndRegs(LoopRegMap &IRM);
+
+ void createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR);
+ void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+ void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+ void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitPartition(const USet &Part);
+
+ static int Counter;
+ static void dump_partition(raw_ostream&, const USet&,
+ const TargetRegisterInfo&);
+ };
+ char HexagonSplitDoubleRegs::ID;
+ int HexagonSplitDoubleRegs::Counter = 0;
+ const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+ = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+ "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+ assert(R.isReg());
+ return getDefRegState(R.isDef()) |
+ getImplRegState(R.isImplicit()) |
+ getKillRegState(R.isKill()) |
+ getDeadRegState(R.isDead()) |
+ getUndefRegState(R.isUndef()) |
+ getInternalReadRegState(R.isInternalRead()) |
+ (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+ const USet &Part, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (auto I : Part)
+ dbgs() << ' ' << PrintReg(I, &TRI);
+ dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+ for (auto I : IRM) {
+ const USet &Rs = I.second;
+ if (Rs.find(Reg) != Rs.end())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+ for (auto &I : MI->memoperands())
+ if (I->isVolatile())
+ return true;
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+ if (MI->mayLoad() || MI->mayStore())
+ if (MemRefsFixed || isVolatileInstr(MI))
+ return true;
+ if (MI->isDebugValue())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ return true;
+
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY:
+ break;
+
+ case Hexagon::L2_loadrd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(1).isReg())
+ break;
+ return true;
+ case Hexagon::S2_storerd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(0).isReg())
+ break;
+ return true;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew:
+ case Hexagon::CONST64_Int_Real:
+
+ case Hexagon::A2_sxtw:
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ case Hexagon::S2_asl_i_p_or:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ break;
+ }
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+ typedef std::map<unsigned,unsigned> UUMap;
+ typedef std::vector<unsigned> UVect;
+
+ unsigned NumRegs = MRI->getNumVirtRegs();
+ BitVector DoubleRegs(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->getRegClass(R) == DoubleRC)
+ DoubleRegs.set(i);
+ }
+
+ BitVector FixedRegs(NumRegs);
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ MachineInstr *DefI = MRI->getVRegDef(R);
+ // In some cases a register may exist, but never be defined or used.
+ // It should never appear anywhere, but mark it as "fixed", just to be
+ // safe.
+ if (!DefI || isFixedInstr(DefI))
+ FixedRegs.set(x);
+ }
+
+ UUSetMap AssocMap;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ if (FixedRegs[x])
+ continue;
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+ USet &Asc = AssocMap[R];
+ for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+ U != Z; ++U) {
+ MachineOperand &Op = *U;
+ MachineInstr *UseI = Op.getParent();
+ if (isFixedInstr(UseI))
+ continue;
+ for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = UseI->getOperand(i);
+ // Skip non-registers or registers with subregisters.
+ if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+ continue;
+ unsigned T = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(T)) {
+ FixedRegs.set(x);
+ continue;
+ }
+ if (MRI->getRegClass(T) != DoubleRC)
+ continue;
+ unsigned u = TargetRegisterInfo::virtReg2Index(T);
+ if (FixedRegs[u])
+ continue;
+ DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+ Asc.insert(T);
+ // Make it symmetric.
+ AssocMap[T].insert(R);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ UUMap R2P;
+ unsigned NextP = 1;
+ USet Visited;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ if (Visited.count(R))
+ continue;
+ // Create a new partition for R.
+ unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+ UVect WorkQ;
+ WorkQ.push_back(R);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ unsigned T = WorkQ[i];
+ if (Visited.count(T))
+ continue;
+ R2P[T] = ThisP;
+ Visited.insert(T);
+ // Add all registers associated with T.
+ USet &Asc = AssocMap[T];
+ for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+ WorkQ.push_back(*J);
+ }
+ }
+
+ for (auto I : R2P)
+ P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+ int32_t P = 0;
+ bool LoZ1 = false, HiZ1 = false;
+ if (Lo == 0 || Lo == 0xFFFFFFFF)
+ P += 10, LoZ1 = true;
+ if (Hi == 0 || Hi == 0xFFFFFFFF)
+ P += 10, HiZ1 = true;
+ if (!LoZ1 && !HiZ1 && Lo == Hi)
+ P += 3;
+ return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+ unsigned ImmX = 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ for (const auto &Op : MI->operands())
+ if (!Op.getSubReg())
+ return 0;
+ return 10;
+ case TargetOpcode::COPY:
+ if (MI->getOperand(1).getSubReg() != 0)
+ return 10;
+ return 0;
+
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::S2_storerd_io:
+ return -1;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+ return 2;
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST64_Int_Real: {
+ uint64_t D = MI->getOperand(1).getImm();
+ unsigned Lo = D & 0xFFFFFFFFULL;
+ unsigned Hi = D >> 32;
+ return profitImm(Lo, Hi);
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ return profitImm(MI->getOperand(1).getImm(),
+ MI->getOperand(2).getImm());
+ case Hexagon::A4_combineri:
+ ImmX++;
+ case Hexagon::A4_combineir: {
+ ImmX++;
+ int64_t V = MI->getOperand(ImmX).getImm();
+ if (V == 0 || V == -1)
+ return 10;
+ // Fall through into A2_combinew.
+ }
+ case Hexagon::A2_combinew:
+ return 2;
+
+ case Hexagon::A2_sxtw:
+ return 3;
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ return 1;
+
+ case Hexagon::S2_asl_i_p_or: {
+ unsigned S = MI->getOperand(3).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ return -1;
+ }
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ unsigned S = MI->getOperand(2).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ if (S == 16)
+ return 5;
+ if (S == 48)
+ return 7;
+ return -10;
+ }
+
+ return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+ const {
+ unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ int32_t TotalP = 0;
+
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ int32_t P = profit(DefI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ // Reduce the profitability of splitting induction registers.
+ if (isInduction(DR, IRM))
+ TotalP -= 30;
+
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U) {
+ MachineInstr *UseI = U->getParent();
+ if (isFixedInstr(UseI)) {
+ FixedNum++;
+ // Calculate the cost of generating REG_SEQUENCE instructions.
+ for (auto &Op : UseI->operands()) {
+ if (Op.isReg() && Part.count(Op.getReg()))
+ if (Op.getSubReg())
+ TotalP -= 2;
+ }
+ continue;
+ }
+ // If a register from this partition is used in a fixed instruction,
+ // and there is also a register in this partition that is used in
+ // a loop phi node, then decrease the splitting profit as this can
+ // confuse the modulo scheduler.
+ if (UseI->isPHI()) {
+ const MachineBasicBlock *PB = UseI->getParent();
+ const MachineLoop *L = MLI->getLoopFor(PB);
+ if (L && L->getHeader() == PB)
+ LoopPhiNum++;
+ }
+ // Splittable instruction.
+ SplitNum++;
+ int32_t P = profit(UseI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ }
+ }
+
+ if (FixedNum > 0 && LoopPhiNum > 0)
+ TotalP -= 20*LoopPhiNum;
+
+ DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+ USet &Rs) {
+ const MachineBasicBlock *HB = L->getHeader();
+ const MachineBasicBlock *LB = L->getLoopLatch();
+ if (!HB || !LB)
+ return;
+
+ // Examine the latch branch. Expect it to be a conditional branch to
+ // the header (either "br-cond header" or "br-cond exit; br header").
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+ SmallVector<MachineOperand,2> Cond;
+ bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+ // Only analyzable conditional branches. HII::AnalyzeBranch will put
+ // the branch opcode as the first element of Cond, and the predicate
+ // operand as the second.
+ if (BadLB || Cond.size() != 2)
+ return;
+ // Only simple jump-conditional (with or without negation).
+ if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+ return;
+ // Must go to the header.
+ if (TB != HB && FB != HB)
+ return;
+ assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+ // Expect a predicate register.
+ unsigned PR = Cond[1].getReg();
+ assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+ // Get the registers on which the loop controlling compare instruction
+ // depends.
+ unsigned CmpR1 = 0, CmpR2 = 0;
+ const MachineInstr *CmpI = MRI->getVRegDef(PR);
+ while (CmpI->getOpcode() == Hexagon::C2_not)
+ CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+ int Mask = 0, Val = 0;
+ bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+ if (!OkCI)
+ return;
+ // Eliminate non-double input registers.
+ if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+ CmpR1 = 0;
+ if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+ CmpR2 = 0;
+ if (!CmpR1 && !CmpR2)
+ return;
+
+ // Now examine the top of the loop: the phi nodes that could poten-
+ // tially define loop induction registers. The registers defined by
+ // such a phi node would be used in a 64-bit add, which then would
+ // be used in the loop compare instruction.
+
+ // Get the set of all double registers defined by phi nodes in the
+ // loop header.
+ typedef std::vector<unsigned> UVect;
+ UVect DP;
+ for (auto &MI : *HB) {
+ if (!MI.isPHI())
+ break;
+ const MachineOperand &MD = MI.getOperand(0);
+ unsigned R = MD.getReg();
+ if (MRI->getRegClass(R) == DoubleRC)
+ DP.push_back(R);
+ }
+ if (DP.empty())
+ return;
+
+ auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+ for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ const MachineInstr *UseI = I->getParent();
+ if (UseI->getOpcode() != Hexagon::A2_addp)
+ continue;
+ // Get the output from the add. If it is one of the inputs to the
+ // loop-controlling compare instruction, then R is likely an induc-
+ // tion register.
+ unsigned T = UseI->getOperand(0).getReg();
+ if (T == CmpR1 || T == CmpR2)
+ return false;
+ }
+ return true;
+ };
+ UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+ Rs.insert(DP.begin(), End);
+ Rs.insert(CmpR1);
+ Rs.insert(CmpR2);
+
+ DEBUG({
+ dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+ dump_partition(dbgs(), Rs, *TRI);
+ dbgs() << '\n';
+ });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+ typedef std::vector<MachineLoop*> LoopVector;
+ LoopVector WorkQ;
+
+ for (auto I : *MLI)
+ WorkQ.push_back(I);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ for (auto I : *WorkQ[i])
+ WorkQ.push_back(I);
+ }
+
+ USet Rs;
+ for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+ MachineLoop *L = WorkQ[i];
+ Rs.clear();
+ collectIndRegsForLoop(L, Rs);
+ if (!Rs.empty())
+ IRM.insert(std::make_pair(L, Rs));
+ }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg()) {
+ NewI->addOperand(Op);
+ continue;
+ }
+ // For register operands, set the subregister.
+ unsigned R = Op.getReg();
+ unsigned SR = Op.getSubReg();
+ bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+ bool isKill = Op.isKill();
+ if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+ isKill = false;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end()) {
+ SR = SubR;
+ } else {
+ const UUPair &P = F->second;
+ R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+ SR = 0;
+ }
+ }
+ auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+ Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+ Op.isInternalRead());
+ NewI->addOperand(CO);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ bool Load = MI->mayLoad();
+ unsigned OrigOpc = MI->getOpcode();
+ bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+ OrigOpc == Hexagon::S2_storerd_pi);
+ MachineInstr *LowI, *HighI;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Index of the base-address-register operand.
+ unsigned AdrX = PostInc ? (Load ? 2 : 1)
+ : (Load ? 1 : 0);
+ MachineOperand &AdrOp = MI->getOperand(AdrX);
+ unsigned RSA = getRegState(AdrOp);
+ MachineOperand &ValOp = Load ? MI->getOperand(0)
+ : (PostInc ? MI->getOperand(3)
+ : MI->getOperand(2));
+ UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+ assert(F != PairMap.end());
+
+ if (Load) {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4);
+ } else {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off)
+ .addReg(P.first);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4)
+ .addReg(P.second);
+ }
+
+ if (PostInc) {
+ // Create the increment of the address register.
+ int64_t Inc = Load ? MI->getOperand(3).getImm()
+ : MI->getOperand(2).getImm();
+ MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+ const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ assert(!UpdOp.getSubReg() && "Def operand with subreg");
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+ .addReg(AdrOp.getReg(), RSA)
+ .addImm(Inc);
+ MRI->replaceRegWith(UpdOp.getReg(), NewR);
+ // The original instruction will be deleted later.
+ }
+
+ // Generate a new pair of memory-operands.
+ MachineFunction &MF = *B.getParent();
+ for (auto &MO : MI->memoperands()) {
+ const MachinePointerInfo &Ptr = MO->getPointerInfo();
+ unsigned F = MO->getFlags();
+ int A = MO->getAlignment();
+
+ auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+ LowI->addMemOperand(MF, Tmp1);
+ auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+ HighI->addMemOperand(MF, Tmp2);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isImm());
+ uint64_t V = Op1.getImm();
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+ // values in MachineOperand are stored as 64-bit integers, and so the
+ // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+ // will have the 32 higher bits truncated in the end, but -1 will remain
+ // as -1, while the latter may appear to be a large unsigned value
+ // requiring a constant extender. The casting to int32_t will select the
+ // former representation. (The same reasoning applies to all 32-bit
+ // values.)
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(int32_t(V & 0xFFFFFFFFULL));
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ if (Op1.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(Op1.getImm());
+ } else if (Op1.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+ .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+
+ if (Op2.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(Op2.getImm());
+ } else if (Op2.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned RS = getRegState(Op1);
+
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+ BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+ .addReg(Op1.getReg(), RS, Op1.getSubReg())
+ .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+ int64_t Sh64 = Op2.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ unsigned Opc = MI->getOpcode();
+ bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+ bool Left = !Right;
+ bool Signed = (Opc == S2_asr_i_p);
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS = getRegState(Op1);
+ unsigned ShiftOpc = Left ? S2_asl_i_r
+ : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ if (S == 0) {
+ // No shift, subregister copy.
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+ .addReg(Op1.getReg(), RS, HiSR);
+ } else if (S < 32) {
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+ unsigned TmpR = MRI->createVirtualRegister(IntRC);
+ // Expansion:
+ // Shift left: DR = shl R, #s
+ // LoR = shl R.lo, #s
+ // TmpR = extractu R.lo, #s, #32-s
+ // HiR = or (TmpR, asl(R.hi, #s))
+ // Shift right: DR = shr R, #s
+ // HiR = shr R.hi, #s
+ // TmpR = shr R.lo, #s
+ // LoR = insert TmpR, R.hi, #s, #32-s
+
+ // Shift left:
+ // LoR = shl R.lo, #s
+ // Shift right:
+ // TmpR = shr R.lo, #s
+
+ // Make a special case for A2_aslh and A2_asrh (they are predicable as
+ // opposed to S2_asl_i_r/S2_asr_i_r).
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S);
+
+ if (Left) {
+ // TmpR = extractu R.lo, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ // HiR = or (TmpR, asl(R.hi, #s))
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S);
+ } else {
+ // HiR = shr R.hi, #s
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+ .addImm(S);
+ // LoR = insert TmpR, R.hi, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S)
+ .addImm(32-S);
+ }
+ } else if (S == 32) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+ if (!Signed)
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ else // Must be right shift.
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ } else if (S < 64) {
+ S -= 32;
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+ .addImm(S);
+
+ if (Signed)
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ else
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+ int64_t Sh64 = Op3.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS1 = getRegState(Op1);
+ unsigned RS2 = getRegState(Op2);
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+ // means: Op0 = or (Op1, asl(Op2, Op3))
+
+ // Expansion of
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = or (R1.lo, asl(R2.lo, #s))
+ // Tmp1 = extractu R2.lo, #s, #32-s
+ // Tmp2 = or R1.hi, Tmp1
+ // HiR = or (Tmp2, asl(R2.hi, #s))
+
+ if (S == 0) {
+ // DR = or (R1, asl(R2, #0))
+ // -> or (R1, R2)
+ // i.e. LoR = or R1.lo, R2.lo
+ // HiR = or R1.hi, R2.hi
+ BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, HiSR);
+ } else if (S < 32) {
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S);
+ unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(TmpR1);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR2)
+ .addReg(Op2.getReg(), RS2, HiSR)
+ .addImm(S);
+ } else if (S == 32) {
+ // DR = or (R1, asl(R2, #32))
+ // -> or R1, R2.lo
+ // LoR = R1.lo
+ // HiR = or R1.hi, R2.lo
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR);
+ } else if (S < 64) {
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = R1:lo
+ // HiR = or (R1:hi, asl(R2:lo, #s-32))
+ S -= 32;
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR)
+ .addImm(S);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ DEBUG(dbgs() << "Splitting: " << *MI);
+ bool Split = false;
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY: {
+ unsigned DstR = MI->getOperand(0).getReg();
+ if (MRI->getRegClass(DstR) == DoubleRC) {
+ createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+ createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+ Split = true;
+ }
+ break;
+ }
+ case A2_andp:
+ createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_orp:
+ createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_xorp:
+ createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+
+ case L2_loadrd_io:
+ case L2_loadrd_pi:
+ case S2_storerd_io:
+ case S2_storerd_pi:
+ splitMemRef(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_tfrpi:
+ case CONST64_Int_Real:
+ splitImmediate(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_combineii:
+ case A4_combineir:
+ case A4_combineii:
+ case A4_combineri:
+ case A2_combinew:
+ splitCombine(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_sxtw:
+ splitExt(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p:
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ splitShift(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p_or:
+ splitAslOr(MI, PairMap);
+ Split = true;
+ break;
+
+ default:
+ llvm_unreachable("Instruction not splitable");
+ return false;
+ }
+
+ return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+ continue;
+ unsigned R = Op.getReg();
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &P = F->second;
+ switch (Op.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Op.setReg(P.first);
+ break;
+ case Hexagon::subreg_hireg:
+ Op.setReg(P.second);
+ break;
+ }
+ Op.setSubReg(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+ continue;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &Pr = F->second;
+ unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+ .addReg(Pr.first)
+ .addImm(Hexagon::subreg_loreg)
+ .addReg(Pr.second)
+ .addImm(Hexagon::subreg_hireg);
+ Op.setReg(NewDR);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+ typedef std::set<MachineInstr*> MISet;
+ bool Changed = false;
+
+ DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+ dbgs() << '\n');
+
+ UUPairMap PairMap;
+
+ MISet SplitIns;
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ SplitIns.insert(DefI);
+
+ // Collect all instructions, including fixed ones. We won't split them,
+ // but we need to visit them again to insert the REG_SEQUENCE instructions.
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ SplitIns.insert(U->getParent());
+
+ unsigned LoR = MRI->createVirtualRegister(IntRC);
+ unsigned HiR = MRI->createVirtualRegister(IntRC);
+ DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+ << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+ PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+ }
+
+ MISet Erase;
+ for (auto MI : SplitIns) {
+ if (isFixedInstr(MI)) {
+ collapseRegPairs(MI, PairMap);
+ } else {
+ bool Done = splitInstr(MI, PairMap);
+ if (Done)
+ Erase.insert(MI);
+ Changed |= Done;
+ }
+ }
+
+ for (unsigned DR : Part) {
+ // Before erasing "double" instructions, revisit all uses of the double
+ // registers in this partition, and replace all uses of them with subre-
+ // gisters, with the corresponding single registers.
+ MISet Uses;
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ Uses.insert(U->getParent());
+ for (auto M : Uses)
+ replaceSubregUses(M, PairMap);
+ }
+
+ for (auto MI : Erase) {
+ MachineBasicBlock *B = MI->getParent();
+ B->erase(MI);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Splitting double registers in function: "
+ << MF.getName() << '\n');
+
+ auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+
+ UUSetMap P2Rs;
+ LoopRegMap IRM;
+
+ collectIndRegs(IRM);
+ partitionRegisters(P2Rs);
+
+ DEBUG({
+ dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ dbgs() << '#' << I->first << " -> ";
+ dump_partition(dbgs(), I->second, *TRI);
+ dbgs() << '\n';
+ }
+ });
+
+ bool Changed = false;
+ int Limit = MaxHSDR;
+
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ if (I->first == 0)
+ continue;
+ if (Limit >= 0 && Counter >= Limit)
+ break;
+ USet &Part = I->second;
+ DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+ if (!isProfitable(Part, IRM))
+ continue;
+ Counter++;
+ Changed |= splitPartition(Part);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+ return new HexagonSplitDoubleRegs();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
new file mode 100644
index 0000000..b5339ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -0,0 +1,616 @@
+//===--- HexagonStoreWidening.cpp------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte
+// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte
+// with
+// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword
+// The above is the general idea. The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure". Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-widen-stores"
+
+#include "HexagonTargetMachine.h"
+
+#include "llvm/PassSupport.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <algorithm>
+
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonStoreWidening();
+ void initializeHexagonStoreWideningPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonStoreWidening : public MachineFunctionPass {
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineFunction *MF;
+
+ public:
+ static char ID;
+ HexagonStoreWidening() : MachineFunctionPass(ID) {
+ initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Hexagon Store Widening";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static bool handledStoreType(const MachineInstr *MI);
+
+ private:
+ static const int MaxWideSize = 4;
+
+ typedef std::vector<MachineInstr*> InstrGroup;
+ typedef std::vector<InstrGroup> InstrGroupList;
+
+ bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
+ bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
+ void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &Group);
+ void createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups);
+ bool processBasicBlock(MachineBasicBlock &MBB);
+ bool processStoreGroup(InstrGroup &Group);
+ bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
+ InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+ bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool replaceStores(InstrGroup &OG, InstrGroup &NG);
+ bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+ };
+
+} // namespace
+
+
+namespace {
+
+// Some local helper functions...
+unsigned getBaseAddressRegister(const MachineInstr *MI) {
+ const MachineOperand &MO = MI->getOperand(0);
+ assert(MO.isReg() && "Expecting register operand");
+ return MO.getReg();
+}
+
+int64_t getStoreOffset(const MachineInstr *MI) {
+ unsigned OpC = MI->getOpcode();
+ assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
+
+ switch (OpC) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io: {
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isImm() && "Expecting immediate offset");
+ return MO.getImm();
+ }
+ }
+ dbgs() << *MI;
+ llvm_unreachable("Store offset calculation missing for a handled opcode");
+ return 0;
+}
+
+const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+ assert(!MI->memoperands_empty() && "Expecting memory operands");
+ return **MI->memoperands_begin();
+}
+
+} // namespace
+
+
+char HexagonStoreWidening::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexason Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexagon Store Widening", false, false)
+
+
+// Filtering function: any stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
+ // For now, only handle stores of immediate values.
+ // Also, reject stores to stack slots.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
+ // Base address must be a register. (Implement FI later.)
+ return MI->getOperand(0).isReg();
+ default:
+ return false;
+ }
+}
+
+
+// Check if the machine memory operand MMO is aliased with any of the
+// stores in the store group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineMemOperand &MMO) {
+ if (!MMO.getValue())
+ return true;
+
+ MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
+
+ for (auto SI : Stores) {
+ const MachineMemOperand &SMO = getStoreTarget(SI);
+ if (!SMO.getValue())
+ return true;
+
+ MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
+ if (AA->alias(L, SL))
+ return true;
+ }
+
+ return false;
+}
+
+
+// Check if the machine instruction MI accesses any storage aliased with
+// any store in the group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineInstr *MI) {
+ for (auto &I : MI->memoperands())
+ if (instrAliased(Stores, *I))
+ return true;
+ return false;
+}
+
+
+// Inspect a machine basic block, and generate store groups out of stores
+// encountered in the block.
+//
+// A store group is a group of stores that use the same base register,
+// and which can be reordered within that group without altering the
+// semantics of the program. A single store group could be widened as
+// a whole, if there existed a single store instruction with the same
+// semantics as the entire group. In many cases, a single store group
+// may need more than one wide store.
+void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups) {
+ InstrGroup AllInsns;
+
+ // Copy all instruction pointers from the basic block to a temporary
+ // list. This will allow operating on the list, and modifying its
+ // elements without affecting the basic block.
+ for (auto &I : MBB)
+ AllInsns.push_back(&I);
+
+ // Traverse all instructions in the AllInsns list, and if we encounter
+ // a store, then try to create a store group starting at that instruction
+ // i.e. a sequence of independent stores that can be widened.
+ for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ // Skip null pointers (processed instructions).
+ if (!MI || !handledStoreType(MI))
+ continue;
+
+ // Found a store. Try to create a store group.
+ InstrGroup G;
+ createStoreGroup(MI, I+1, E, G);
+ if (G.size() > 1)
+ StoreGroups.push_back(G);
+ }
+}
+
+
+// Create a single store group. The stores need to be independent between
+// themselves, and also there cannot be other instructions between them
+// that could read or modify storage being stored into.
+void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
+ InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
+ assert(handledStoreType(BaseStore) && "Unexpected instruction");
+ unsigned BaseReg = getBaseAddressRegister(BaseStore);
+ InstrGroup Other;
+
+ Group.push_back(BaseStore);
+
+ for (auto I = Begin; I != End; ++I) {
+ MachineInstr *MI = *I;
+ if (!MI)
+ continue;
+
+ if (handledStoreType(MI)) {
+ // If this store instruction is aliased with anything already in the
+ // group, terminate the group now.
+ if (instrAliased(Group, getStoreTarget(MI)))
+ return;
+ // If this store is aliased to any of the memory instructions we have
+ // seen so far (that are not a part of this group), terminate the group.
+ if (instrAliased(Other, getStoreTarget(MI)))
+ return;
+
+ unsigned BR = getBaseAddressRegister(MI);
+ if (BR == BaseReg) {
+ Group.push_back(MI);
+ *I = 0;
+ continue;
+ }
+ }
+
+ // Assume calls are aliased to everything.
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ return;
+
+ if (MI->mayLoad() || MI->mayStore()) {
+ if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
+ return;
+ Other.push_back(MI);
+ }
+ } // for
+}
+
+
+// Check if store instructions S1 and S2 are adjacent. More precisely,
+// S2 has to access memory immediately following that accessed by S1.
+bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
+ const MachineInstr *S2) {
+ if (!handledStoreType(S1) || !handledStoreType(S2))
+ return false;
+
+ const MachineMemOperand &S1MO = getStoreTarget(S1);
+
+ // Currently only handling immediate stores.
+ int Off1 = S1->getOperand(1).getImm();
+ int Off2 = S2->getOperand(1).getImm();
+
+ return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
+ : int(Off1+S1MO.getSize()) == Off2;
+}
+
+
+/// Given a sequence of adjacent stores, and a maximum size of a single wide
+/// store, pick a group of stores that can be replaced by a single store
+/// of size not exceeding MaxSize. The selected sequence will be recorded
+/// in OG ("old group" of instructions).
+/// OG should be empty on entry, and should be left empty if the function
+/// fails.
+bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
+ unsigned MaxSize) {
+ assert(Begin != End && "No instructions to analyze");
+ assert(OG.empty() && "Old group not empty on entry");
+
+ if (std::distance(Begin, End) <= 1)
+ return false;
+
+ MachineInstr *FirstMI = *Begin;
+ assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
+ const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
+ unsigned Alignment = FirstMMO.getAlignment();
+ unsigned SizeAccum = FirstMMO.getSize();
+ unsigned FirstOffset = getStoreOffset(FirstMI);
+
+ // The initial value of SizeAccum should always be a power of 2.
+ assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
+
+ // If the size of the first store equals to or exceeds the limit, do nothing.
+ if (SizeAccum >= MaxSize)
+ return false;
+
+ // If the size of the first store is greater than or equal to the address
+ // stored to, then the store cannot be made any wider.
+ if (SizeAccum >= Alignment)
+ return false;
+
+ // The offset of a store will put restrictions on how wide the store can be.
+ // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
+ // If the first store already exhausts the offset limits, quit. Test this
+ // by checking if the next wider size would exceed the limit.
+ if ((2*SizeAccum-1) & FirstOffset)
+ return false;
+
+ OG.push_back(FirstMI);
+ MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
+ InstrGroup::iterator I = Begin+1;
+
+ // Pow2Num will be the largest number of elements in OG such that the sum
+ // of sizes of stores 0...Pow2Num-1 will be a power of 2.
+ unsigned Pow2Num = 1;
+ unsigned Pow2Size = SizeAccum;
+
+ // Be greedy: keep accumulating stores as long as they are to adjacent
+ // memory locations, and as long as the total number of bytes stored
+ // does not exceed the limit (MaxSize).
+ // Keep track of when the total size covered is a power of 2, since
+ // this is a size a single store can cover.
+ while (I != End) {
+ S2 = *I;
+ // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
+ // any other store to fill the "hole".
+ if (!storesAreAdjacent(S1, S2))
+ break;
+
+ unsigned S2Size = getStoreTarget(S2).getSize();
+ if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
+ break;
+
+ OG.push_back(S2);
+ SizeAccum += S2Size;
+ if (isPowerOf2_32(SizeAccum)) {
+ Pow2Num = OG.size();
+ Pow2Size = SizeAccum;
+ }
+ if ((2*Pow2Size-1) & FirstOffset)
+ break;
+
+ S1 = S2;
+ ++I;
+ }
+
+ // The stores don't add up to anything that can be widened. Clean up.
+ if (Pow2Num <= 1) {
+ OG.clear();
+ return false;
+ }
+
+ // Only leave the stored being widened.
+ OG.resize(Pow2Num);
+ TotalSize = Pow2Size;
+ return true;
+}
+
+
+/// Given an "old group" OG of stores, create a "new group" NG of instructions
+/// to replace them. Ideally, NG would only have a single instruction in it,
+/// but that may only be possible for store-immediate.
+bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
+ unsigned TotalSize) {
+ // XXX Current limitations:
+ // - only expect stores of immediate values in OG,
+ // - only handle a TotalSize of up to 4.
+
+ if (TotalSize > 4)
+ return false;
+
+ unsigned Acc = 0; // Value accumulator.
+ unsigned Shift = 0;
+
+ for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ const MachineMemOperand &MMO = getStoreTarget(MI);
+ MachineOperand &SO = MI->getOperand(2); // Source.
+ assert(SO.isImm() && "Expecting an immediate operand");
+
+ unsigned NBits = MMO.getSize()*8;
+ unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
+ unsigned Val = (SO.getImm() & Mask) << Shift;
+ Acc |= Val;
+ Shift += NBits;
+ }
+
+
+ MachineInstr *FirstSt = OG.front();
+ DebugLoc DL = OG.back()->getDebugLoc();
+ const MachineMemOperand &OldM = getStoreTarget(FirstSt);
+ MachineMemOperand *NewM =
+ MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+ TotalSize, OldM.getAlignment(),
+ OldM.getAAInfo());
+
+ if (Acc < 0x10000) {
+ // Create mem[hw] = #Acc
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
+ (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addImm(Val);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ } else {
+ // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
+ const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
+ const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+ unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
+ .addImm(int(Acc));
+ NG.push_back(TfrI);
+
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
+ (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addReg(VReg, RegState::Kill);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ }
+
+ return true;
+}
+
+
+// Replace instructions from the old group OG with instructions from the
+// new group NG. Conceptually, remove all instructions in OG, and then
+// insert all instructions in NG, starting at where the first instruction
+// from OG was (in the order in which they appeared in the basic block).
+// (The ordering in OG does not have to match the order in the basic block.)
+bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
+ DEBUG({
+ dbgs() << "Replacing:\n";
+ for (auto I : OG)
+ dbgs() << " " << *I;
+ dbgs() << "with\n";
+ for (auto I : NG)
+ dbgs() << " " << *I;
+ });
+
+ MachineBasicBlock *MBB = OG.back()->getParent();
+ MachineBasicBlock::iterator InsertAt = MBB->end();
+
+ // Need to establish the insertion point. The best one is right before
+ // the first store in the OG, but in the order in which the stores occur
+ // in the program list. Since the ordering in OG does not correspond
+ // to the order in the program list, we need to do some work to find
+ // the insertion point.
+
+ // Create a set of all instructions in OG (for quick lookup).
+ SmallPtrSet<MachineInstr*, 4> InstrSet;
+ for (auto I : OG)
+ InstrSet.insert(I);
+
+ // Traverse the block, until we hit an instruction from OG.
+ for (auto &I : *MBB) {
+ if (InstrSet.count(&I)) {
+ InsertAt = I;
+ break;
+ }
+ }
+
+ assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
+
+ bool AtBBStart = false;
+
+ // InsertAt points at the first instruction that will be removed. We need
+ // to move it out of the way, so it remains valid after removing all the
+ // old stores, and so we are able to recover it back to the proper insertion
+ // position.
+ if (InsertAt != MBB->begin())
+ --InsertAt;
+ else
+ AtBBStart = true;
+
+ for (auto I : OG)
+ I->eraseFromParent();
+
+ if (!AtBBStart)
+ ++InsertAt;
+ else
+ InsertAt = MBB->begin();
+
+ for (auto I : NG)
+ MBB->insert(InsertAt, I);
+
+ return true;
+}
+
+
+// Break up the group into smaller groups, each of which can be replaced by
+// a single wide store. Widen each such smaller group and replace the old
+// instructions with the widened ones.
+bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
+ bool Changed = false;
+ InstrGroup::iterator I = Group.begin(), E = Group.end();
+ InstrGroup OG, NG; // Old and new groups.
+ unsigned CollectedSize;
+
+ while (I != E) {
+ OG.clear();
+ NG.clear();
+
+ bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
+ createWideStores(OG, NG, CollectedSize) &&
+ replaceStores(OG, NG);
+ if (!Succ)
+ continue;
+
+ assert(OG.size() > 1 && "Created invalid group");
+ assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
+ I += OG.size()-1;
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+// Process a single basic block: create the store groups, and replace them
+// with the widened stores, if possible. Processing of each basic block
+// is independent from processing of any other basic block. This transfor-
+// mation could be stopped after having processed any basic block without
+// any ill effects (other than not having performed widening in the unpro-
+// cessed blocks). Also, the basic blocks can be processed in any order.
+bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
+ InstrGroupList SGs;
+ bool Changed = false;
+
+ createStoreGroups(MBB, SGs);
+
+ auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
+ return getStoreOffset(A) < getStoreOffset(B);
+ };
+ for (auto &G : SGs) {
+ assert(G.size() > 1 && "Store group with fewer than 2 elements");
+ std::sort(G.begin(), G.end(), Less);
+
+ Changed |= processStoreGroup(G);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
+ MF = &MFn;
+ auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MRI = &MFn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ bool Changed = false;
+
+ for (auto &B : MFn)
+ Changed |= processBasicBlock(B);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonStoreWidening() {
+ return new HexagonStoreWidening();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 0000000..aa0efd4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,125 @@
+//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate non-chopped conversion from fp to int."));
+
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Double Vector eXtensions"));
+
+static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Vector eXtensions"));
+
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
+void HexagonSubtarget::initializeEnvironment() {
+ UseMemOps = false;
+ ModeIEEERndNear = false;
+ UseBSBScheduling = false;
+}
+
+HexagonSubtarget &
+HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+ CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
+
+ static std::map<StringRef, HexagonArchEnum> CpuTable {
+ { "hexagonv4", V4 },
+ { "hexagonv5", V5 },
+ { "hexagonv55", V55 },
+ { "hexagonv60", V60 },
+ };
+
+ auto foundIt = CpuTable.find(CPUString);
+ if (foundIt != CpuTable.end())
+ HexagonArchVersion = foundIt->second;
+ else
+ llvm_unreachable("Unrecognized Hexagon processor version");
+
+ UseHVXOps = false;
+ UseHVXDblOps = false;
+ ParseSubtargetFeatures(CPUString, FS);
+
+ if (EnableHexagonHVX.getPosition())
+ UseHVXOps = EnableHexagonHVX;
+ if (EnableHexagonHVXDouble.getPosition())
+ UseHVXDblOps = EnableHexagonHVXDouble;
+
+ return *this;
+}
+
+HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetMachine &TM)
+ : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ FrameLowering() {
+
+ initializeEnvironment();
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ // UseMemOps on by default unless disabled explicitly
+ if (DisableMemOps)
+ UseMemOps = false;
+ else if (EnableMemOps)
+ UseMemOps = true;
+ else
+ UseMemOps = false;
+
+ if (EnableIEEERndNear)
+ ModeIEEERndNear = true;
+ else
+ ModeIEEERndNear = false;
+
+ UseBSBScheduling = hasV60TOps() && EnableBSBSched;
+}
+
+// Pin the vtable to this file.
+void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+ if (DisableHexagonMISched.getNumOccurrences())
+ return !DisableHexagonMISched;
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 0000000..c7ae139
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,121 @@
+//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+
+#include "HexagonFrameLowering.h"
+#include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+#define Hexagon_SLOTS 4
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+ virtual void anchor();
+
+ bool UseMemOps, UseHVXOps, UseHVXDblOps;
+ bool ModeIEEERndNear;
+
+public:
+ enum HexagonArchEnum {
+ V4, V5, V55, V60
+ };
+
+ HexagonArchEnum HexagonArchVersion;
+ /// True if the target should use Back-Skip-Back scheduling. This is the
+ /// default for V60.
+ bool UseBSBScheduling;
+
+private:
+ std::string CPUString;
+ HexagonInstrInfo InstrInfo;
+ HexagonTargetLowering TLInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
+ InstrItineraryData InstrItins;
+ void initializeEnvironment();
+
+public:
+ HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ /// getInstrItins - Return the instruction itineraries based on subtarget
+ /// selection.
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+ const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const HexagonRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const HexagonTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const HexagonFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool useMemOps() const { return UseMemOps; }
+ bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
+ bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+ bool hasV55TOps() const { return getHexagonArchVersion() >= V55; }
+ bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
+ bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
+ bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
+ bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool useHVXOps() const { return UseHVXOps; }
+ bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
+ bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+
+ bool useBSBScheduling() const { return UseBSBScheduling; }
+ bool enableMachineScheduler() const override;
+ // Always use the TargetLowering default scheduler.
+ // FIXME: This will use the vliw scheduler which is probably just hurting
+ // compiler time and will be removed eventually anyway.
+ bool enableMachineSchedDefaultSched() const override { return false; }
+
+ const std::string &getCPUString () const { return CPUString; }
+
+ // Threshold for small data section
+ unsigned getSmallDataThreshold() const {
+ return Hexagon_SMALL_DATA_THRESHOLD;
+ }
+ const HexagonArchEnum &getHexagonArchVersion() const {
+ return HexagonArchVersion;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 0000000..9dccd69
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,299 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Hexagon target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonMachineScheduler.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+ cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
+
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> DisableStoreWidening("disable-store-widen",
+ cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Early expansion of MUX"));
+
+static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+
+static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
+ cl::Hidden, cl::desc("Generate \"insert\" instructions"));
+
+static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+
+static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
+ cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+
+static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden,
+ cl::desc("Enable converting conditional transfers into MUX instructions"));
+
+static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
+ cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
+ "predicate instructions"));
+
+static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
+ cl::desc("Disable splitting double registers"));
+
+static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
+ cl::Hidden, cl::desc("Bit simplification"));
+
+static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
+ cl::Hidden, cl::desc("Loop rescheduling"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
+ return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
+ createVLIWMachineSched);
+
+namespace llvm {
+ FunctionPass *createHexagonBitSimplify();
+ FunctionPass *createHexagonCallFrameInformation();
+ FunctionPass *createHexagonCFGOptimizer();
+ FunctionPass *createHexagonCommonGEP();
+ FunctionPass *createHexagonCopyToCombine();
+ FunctionPass *createHexagonEarlyIfConversion();
+ FunctionPass *createHexagonExpandCondsets();
+ FunctionPass *createHexagonExpandPredSpillCode();
+ FunctionPass *createHexagonFixupHwLoops();
+ FunctionPass *createHexagonGenExtract();
+ FunctionPass *createHexagonGenInsert();
+ FunctionPass *createHexagonGenMux();
+ FunctionPass *createHexagonGenPredicate();
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+ FunctionPass *createHexagonLoopRescheduling();
+ FunctionPass *createHexagonNewValueJump();
+ FunctionPass *createHexagonOptimizeSZextends();
+ FunctionPass *createHexagonPacketizer();
+ FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonSplitConst32AndConst64();
+ FunctionPass *createHexagonSplitDoubleRegs();
+ FunctionPass *createHexagonStoreWidening();
+} // end namespace llvm;
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-"
+ "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-"
+ "n16:32", TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<HexagonTargetObjectFile>()) {
+ initAsmInfo();
+}
+
+const HexagonSubtarget *
+HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
+
+TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(HexagonTTIImpl(this, F));
+ });
+}
+
+
+HexagonTargetMachine::~HexagonTargetMachine() {}
+
+namespace {
+/// Hexagon Code Generator Pass Configuration Options.
+class HexagonPassConfig : public TargetPassConfig {
+public:
+ HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+ if (!NoOpt) {
+ if (EnableExpandCondsets) {
+ Pass *Exp = createHexagonExpandCondsets();
+ insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+ }
+ }
+ }
+
+ HexagonTargetMachine &getHexagonTargetMachine() const {
+ return getTM<HexagonTargetMachine>();
+ }
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ return createVLIWMachineSched(C);
+ }
+
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+} // namespace
+
+TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new HexagonPassConfig(this, PM);
+}
+
+void HexagonPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+ addPass(createAtomicExpandPass(TM));
+ if (!NoOpt) {
+ if (EnableCommGEP)
+ addPass(createHexagonCommonGEP());
+ // Replace certain combinations of shifts and ands with extracts.
+ if (EnableGenExtract)
+ addPass(createHexagonGenExtract());
+ }
+}
+
+bool HexagonPassConfig::addInstSelector() {
+ HexagonTargetMachine &TM = getHexagonTargetMachine();
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+ if (!NoOpt)
+ addPass(createHexagonOptimizeSZextends());
+
+ addPass(createHexagonISelDag(TM, getOptLevel()));
+
+ if (!NoOpt) {
+ // Create logical operations on predicate registers.
+ if (EnableGenPred)
+ addPass(createHexagonGenPredicate(), false);
+ // Rotate loops to expose bit-simplification opportunities.
+ if (EnableLoopResched)
+ addPass(createHexagonLoopRescheduling(), false);
+ // Split double registers.
+ if (!DisableHSDR)
+ addPass(createHexagonSplitDoubleRegs());
+ // Bit simplification.
+ if (EnableBitSimplify)
+ addPass(createHexagonBitSimplify(), false);
+ addPass(createHexagonPeephole());
+ printAndVerify("After hexagon peephole pass");
+ if (EnableGenInsert)
+ addPass(createHexagonGenInsert(), false);
+ if (EnableEarlyIf)
+ addPass(createHexagonEarlyIfConversion(), false);
+ }
+
+ return false;
+}
+
+void HexagonPassConfig::addPreRegAlloc() {
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!DisableStoreWidening)
+ addPass(createHexagonStoreWidening(), false);
+ if (!DisableHardwareLoops)
+ addPass(createHexagonHardwareLoops(), false);
+ }
+}
+
+void HexagonPassConfig::addPostRegAlloc() {
+ if (getOptLevel() != CodeGenOpt::None)
+ if (!DisableHexagonCFGOpt)
+ addPass(createHexagonCFGOptimizer(), false);
+}
+
+void HexagonPassConfig::addPreSched2() {
+ addPass(createHexagonCopyToCombine(), false);
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID, false);
+ addPass(createHexagonSplitConst32AndConst64());
+}
+
+void HexagonPassConfig::addPreEmitPass() {
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+ if (!NoOpt)
+ addPass(createHexagonNewValueJump(), false);
+
+ // Expand Spill code for predicate registers.
+ addPass(createHexagonExpandPredSpillCode(), false);
+
+ // Create Packets.
+ if (!NoOpt) {
+ if (!DisableHardwareLoops)
+ addPass(createHexagonFixupHwLoops(), false);
+ // Generate MUX from pairs of conditional transfers.
+ if (EnableGenMux)
+ addPass(createHexagonGenMux(), false);
+
+ addPass(createHexagonPacketizer(), false);
+ }
+
+ // Add CFI instructions if necessary.
+ addPass(createHexagonCallFrameInformation(), false);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 0000000..968814b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,50 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
+
+public:
+ HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~HexagonTargetMachine() override;
+ const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
+
+ HexagonTargetObjectFile *getObjFileLowering() const override {
+ return static_cast<HexagonTargetObjectFile*>(TLOF.get());
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 0000000..ccca620
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,98 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+ cl::init(8), cl::Hidden,
+ cl::desc("The maximum size of an object in the sdata section"));
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+
+ SmallDataSection = getContext().getELFSection(
+ ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+
+bool HexagonTargetObjectFile::IsSmallDataEnabled () const {
+ return SmallDataThreshold > 0;
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // If the primary definition of this global value is outside the current
+ // translation unit or the global value is available for inspection but not
+ // emission, then do nothing.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+ // up in getKindForGlobal(GV, TM).
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) {
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(
+ GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
+ }
+
+ return false;
+}
+
+MCSection *
+HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 0000000..da0eeeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+ class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+ MCSectionELF *SmallDataSection;
+ MCSectionELF *SmallBSSSection;
+
+ public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ bool IsSmallDataEnabled () const;
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
new file mode 100644
index 0000000..e19c404
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -0,0 +1,31 @@
+//===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONTARGETSTREAMER_H
+#define HEXAGONTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class HexagonTargetStreamer : public MCTargetStreamer {
+public:
+ HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0){};
+ virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
+ virtual void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessGranularity){};
+ virtual void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlign,
+ unsigned AccessGranularity){};
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
new file mode 100644
index 0000000..a05443e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagontti"
+
+TargetTransformInfo::PopcntSupportKind
+HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
+ // Return Fast Hardware support as every input < 64 bits will be promoted
+ // to 64 bits.
+ return TargetTransformInfo::PSK_FastHardware;
+}
+
+// The Hexagon target can unroll loops with run-time trip counts.
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ UP.Runtime = UP.Partial = true;
+}
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
+ return vector ? 0 : 32;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
new file mode 100644
index 0000000..71ae17a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
+ typedef BasicTTIImplBase<HexagonTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const HexagonSubtarget *ST;
+ const HexagonTargetLowering *TLI;
+
+ const HexagonSubtarget *getST() const { return ST; }
+ const HexagonTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ HexagonTTIImpl(const HexagonTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ HexagonTTIImpl(HexagonTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+ // The Hexagon target can unroll loops with run-time trip counts.
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool vector) const;
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 0000000..8185054
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,1601 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonVLIWPacketizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "packets"
+
+static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon packetizer pass"));
+
+static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
+ cl::ZeroOrMore, cl::Hidden, cl::init(true),
+ cl::desc("Allow non-solo packetization of volatile memory references"));
+
+static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+
+static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
+ cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Disable vector double new-value-stores"));
+
+extern cl::opt<bool> ScheduleInlineAsm;
+
+namespace llvm {
+ FunctionPass *createHexagonPacketizer();
+ void initializeHexagonPacketizerPass(PassRegistry&);
+}
+
+
+namespace {
+ class HexagonPacketizer : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonPacketizer() : MachineFunctionPass(ID) {
+ initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const override {
+ return "Hexagon Packetizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+ };
+
+ char HexagonPacketizer::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
+ false, false)
+
+
+HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+}
+
+// Check if FirstI modifies a register that SecondI reads.
+static bool hasWriteToReadDep(const MachineInstr *FirstI,
+ const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
+ for (auto &MO : FirstI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (SecondI->readsRegister(R, TRI))
+ return true;
+ }
+ return false;
+}
+
+
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+ MachineBasicBlock::iterator BundleIt, bool Before) {
+ MachineBasicBlock::instr_iterator InsertPt;
+ if (Before)
+ InsertPt = BundleIt.getInstrIterator();
+ else
+ InsertPt = std::next(BundleIt).getInstrIterator();
+
+ MachineBasicBlock &B = *MI->getParent();
+ // The instruction should at least be bundled with the preceding instruction
+ // (there will always be one, i.e. BUNDLE, if nothing else).
+ assert(MI->isBundledWithPred());
+ if (MI->isBundledWithSucc()) {
+ MI->clearFlag(MachineInstr::BundledSucc);
+ MI->clearFlag(MachineInstr::BundledPred);
+ } else {
+ // If it's not bundled with the successor (i.e. it is the last one
+ // in the bundle), then we can simply unbundle it from the predecessor,
+ // which will take care of updating the predecessor's flag.
+ MI->unbundleFromPred();
+ }
+ B.splice(InsertPt, &B, MI);
+
+ // Get the size of the bundle without asserting.
+ MachineBasicBlock::const_instr_iterator I(BundleIt);
+ MachineBasicBlock::const_instr_iterator E = B.instr_end();
+ unsigned Size = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I)
+ ++Size;
+
+ // If there are still two or more instructions, then there is nothing
+ // else to be done.
+ if (Size > 1)
+ return BundleIt;
+
+ // Otherwise, extract the single instruction out and delete the bundle.
+ MachineBasicBlock::iterator NextIt = std::next(BundleIt);
+ MachineInstr *SingleI = BundleIt->getNextNode();
+ SingleI->unbundleFromPred();
+ assert(!SingleI->isBundledWithSucc());
+ BundleIt->eraseFromParent();
+ return NextIt;
+}
+
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePacketizer)
+ return false;
+
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ if (EnableGenAllInsnClass)
+ HII->genAllInsnTimingClasses(MF);
+
+ // Instantiate the packetizer.
+ HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
+
+ // DFA state table should not be empty.
+ assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+ //
+ // Loop over all basic blocks and remove KILL pseudo-instructions
+ // These instructions confuse the dependence analysis. Consider:
+ // D0 = ... (Insn 0)
+ // R0 = KILL R0, D0 (Insn 1)
+ // R0 = ... (Insn 2)
+ // Here, Insn 1 will result in the dependence graph not emitting an output
+ // dependence between Insn 0 and Insn 2. This can lead to incorrect
+ // packetization
+ //
+ for (auto &MB : MF) {
+ auto End = MB.end();
+ auto MI = MB.begin();
+ while (MI != End) {
+ auto NextI = std::next(MI);
+ if (MI->isKill()) {
+ MB.erase(MI);
+ End = MB.end();
+ }
+ MI = NextI;
+ }
+ }
+
+ // Loop over all of the basic blocks.
+ for (auto &MB : MF) {
+ auto Begin = MB.begin(), End = MB.end();
+ while (Begin != End) {
+ // First the first non-boundary starting from the end of the last
+ // scheduling region.
+ MachineBasicBlock::iterator RB = Begin;
+ while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
+ ++RB;
+ // First the first boundary starting from the beginning of the new
+ // region.
+ MachineBasicBlock::iterator RE = RB;
+ while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
+ ++RE;
+ // Add the scheduling boundary if it's not block end.
+ if (RE != End)
+ ++RE;
+ // If RB == End, then RE == End.
+ if (RB != End)
+ Packetizer.PacketizeMIs(&MB, RB, RE);
+
+ Begin = RE;
+ }
+ }
+
+ Packetizer.unpacketizeSoloInstrs(MF);
+ return true;
+}
+
+
+// Reserve resources for a constant extender. Trigger an assertion if the
+// reservation fails.
+void HexagonPacketizerList::reserveResourcesForConstExt() {
+ if (!tryAllocateResourcesForConstExt(true))
+ llvm_unreachable("Resources not available");
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt() {
+ return tryAllocateResourcesForConstExt(false);
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
+// return true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
+ auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
+ bool Avail = ResourceTracker->canReserveResources(ExtMI);
+ if (Reserve && Avail)
+ ResourceTracker->reserveResources(ExtMI);
+ MF.DeleteMachineInstr(ExtMI);
+ return Avail;
+}
+
+
+bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+ SDep::Kind DepType, unsigned DepReg) {
+ // Check for LR dependence.
+ if (DepReg == HRI->getRARegister())
+ return true;
+
+ if (HII->isDeallocRet(MI))
+ if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
+ return true;
+
+ // Check if this is a predicate dependence.
+ const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
+ if (RC == &Hexagon::PredRegsRegClass)
+ return true;
+
+ // Assumes that the first operand of the CALLr is the function address.
+ if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
+ MachineOperand MO = MI->getOperand(0);
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
+ return true;
+ }
+
+ return false;
+}
+
+static bool isRegDependence(const SDep::Kind DepType) {
+ return DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output;
+}
+
+static bool isDirectJump(const MachineInstr* MI) {
+ return MI->getOpcode() == Hexagon::J2_jump;
+}
+
+static bool isSchedBarrier(const MachineInstr* MI) {
+ switch (MI->getOpcode()) {
+ case Hexagon::Y2_barrier:
+ return true;
+ }
+ return false;
+}
+
+static bool isControlFlow(const MachineInstr* MI) {
+ return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+
+/// Returns true if the instruction modifies a callee-saved register.
+static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ if (MI->modifiesRegister(*CSR, TRI))
+ return true;
+ return false;
+}
+
+// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+// Returns true if an instruction can be promoted to .new predicate or
+// new-value store.
+bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
+ return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
+}
+
+// Promote an instructiont to its .cur form.
+// At this time, we have already made a call to canPromoteToDotCur and made
+// sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert(DepType == SDep::Data);
+ int CurOpcode = HII->getDotCurOp(MI);
+ MI->setDesc(HII->get(CurOpcode));
+ return true;
+}
+
+void HexagonPacketizerList::cleanUpDotCur() {
+ MachineInstr *MI = NULL;
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+ if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+ MI = BI;
+ continue;
+ }
+ if (MI) {
+ for (auto &MO : BI->operands())
+ if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
+ }
+ if (!MI)
+ return;
+ // We did not find a use of the CUR, so de-cur it.
+ MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+ DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
+}
+
+// Check to see if an instruction can be dot cur.
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass *RC) {
+ if (!HII->isV60VectorInstruction(MI))
+ return false;
+ if (!HII->isV60VectorInstruction(MII))
+ return false;
+
+ // Already a dot new instruction.
+ if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
+ return false;
+
+ if (!HII->mayBeCurLoad(MI))
+ return false;
+
+ // The "cur value" cannot come from inline asm.
+ if (PacketSU->getInstr()->isInlineAsm())
+ return false;
+
+ // Make sure candidate instruction uses cur.
+ DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
+ MI->dump();
+ dbgs() << "in packet\n";);
+ MachineInstr *MJ = MII;
+ DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
+ unsigned DestReg = MI->getOperand(0).getReg();
+ bool FoundMatch = false;
+ for (auto &MO : MJ->operands())
+ if (MO.isReg() && MO.getReg() == DestReg)
+ FoundMatch = true;
+ if (!FoundMatch)
+ return false;
+
+ // Check for existing uses of a vector register within the packet which
+ // would be affected by converting a vector load into .cur formt.
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "packet has "; BI->dump(););
+ if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+ // We can convert the opcode into a .cur.
+ return true;
+}
+
+// Promote an instruction to its .new form. At this time, we have already
+// made a call to canPromoteToDotNew and made sure that it can *indeed* be
+// promoted.
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert (DepType == SDep::Data);
+ int NewOpcode;
+ if (RC == &Hexagon::PredRegsRegClass)
+ NewOpcode = HII->getDotNewPredOp(MI, MBPI);
+ else
+ NewOpcode = HII->getDotNewOp(MI);
+ MI->setDesc(HII->get(NewOpcode));
+ return true;
+}
+
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
+ int NewOpcode = HII->getDotOldOp(MI->getOpcode());
+ MI->setDesc(HII->get(NewOpcode));
+ return true;
+}
+
+enum PredicateKind {
+ PK_False,
+ PK_True,
+ PK_Unknown
+};
+
+/// Returns true if an instruction is predicated on p0 and false if it's
+/// predicated on !p0.
+static PredicateKind getPredicateSense(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ if (!HII->isPredicated(MI))
+ return PK_Unknown;
+ if (HII->isPredicatedTrue(MI))
+ return PK_True;
+ return PK_False;
+}
+
+static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+ // Post Increment means duplicates. Use dense map to find duplicates in the
+ // list. Caution: Densemap initializes with the minimum of 64 buckets,
+ // whereas there are at most 5 operands in the post increment.
+ DenseSet<unsigned> DefRegsSet;
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isDef())
+ DefRegsSet.insert(MO.getReg());
+
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
+ return MO;
+#else
+ if (MI->mayLoad()) {
+ const MachineOperand &Op1 = MI->getOperand(1);
+ // The 2nd operand is always the post increment operand in load.
+ assert(Op1.isReg() && "Post increment operand has be to a register.");
+ return Op1;
+ }
+ if (MI->getDesc().mayStore()) {
+ const MachineOperand &Op0 = MI->getOperand(0);
+ // The 1st operand is always the post increment operand in store.
+ assert(Op0.isReg() && "Post increment operand has be to a register.");
+ return Op0;
+ }
+#endif
+ // we should never come here.
+ llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// Get the value being stored.
+static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
+ // value being stored is always the last operand.
+ return MI->getOperand(MI->getNumOperands()-1);
+}
+
+static bool isLoadAbsSet(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::L4_loadrd_ap:
+ case Hexagon::L4_loadrb_ap:
+ case Hexagon::L4_loadrh_ap:
+ case Hexagon::L4_loadrub_ap:
+ case Hexagon::L4_loadruh_ap:
+ case Hexagon::L4_loadri_ap:
+ return true;
+ }
+ return false;
+}
+
+static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+ assert(isLoadAbsSet(MI));
+ return MI->getOperand(1);
+}
+
+
+// Can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+// be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode, its address
+// register cannot be a new-value register. Arch Spec 5.4.2.1.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+// as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets the new-value register is conditional, then
+// the instruction that uses the new-value register must also be conditional,
+// and both must always have their predicates evaluate identically.
+// Arch Spec 5.4.2.3.
+// 5. There is an implied restriction that a packet cannot have another store,
+// if there is a new value store in the packet. Corollary: if there is
+// already a store in a packet, there can not be a new value store.
+// Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
+ const MachineInstr *PacketMI, unsigned DepReg) {
+ // Make sure we are looking at the store, that can be promoted.
+ if (!HII->mayBeNewStore(MI))
+ return false;
+
+ // Make sure there is dependency and can be new value'd.
+ const MachineOperand &Val = getStoreValueOperand(MI);
+ if (Val.isReg() && Val.getReg() != DepReg)
+ return false;
+
+ const MCInstrDesc& MCID = PacketMI->getDesc();
+
+ // First operand is always the result.
+ const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+ // Double regs can not feed into new value store: PRM section: 5.4.2.2.
+ if (PacketRC == &Hexagon::DoubleRegsRegClass)
+ return false;
+
+ // New-value stores are of class NV (slot 0), dual stores require class ST
+ // in slot 0 (PRM 5.5).
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ if (PacketSU->getInstr()->mayStore())
+ return false;
+ }
+
+ // Make sure it's NOT the post increment register that we are going to
+ // new value.
+ if (HII->isPostIncrement(MI) &&
+ getPostIncrementOperand(MI, HII).getReg() == DepReg) {
+ return false;
+ }
+
+ if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+ getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
+ // If source is post_inc, or absolute-set addressing, it can not feed
+ // into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1.
+ return false;
+ }
+
+ if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
+ return false;
+
+ // If the source that feeds the store is predicated, new value store must
+ // also be predicated.
+ if (HII->isPredicated(PacketMI)) {
+ if (!HII->isPredicated(MI))
+ return false;
+
+ // Check to make sure that they both will have their predicates
+ // evaluate identically.
+ unsigned predRegNumSrc = 0;
+ unsigned predRegNumDst = 0;
+ const TargetRegisterClass* predRegClass = nullptr;
+
+ // Get predicate register used in the source instruction.
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumSrc = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
+ break;
+ }
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated PacketMI instruction");
+
+ // Get predicate register used in new-value store instruction.
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumDst = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
+ break;
+ }
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated MI instruction");
+
+ // New-value register producer and user (store) need to satisfy these
+ // constraints:
+ // 1) Both instructions should be predicated on the same register.
+ // 2) If producer of the new-value register is .new predicated then store
+ // should also be .new predicated and if producer is not .new predicated
+ // then store should not be .new predicated.
+ // 3) Both new-value register producer and user should have same predicate
+ // sense, i.e, either both should be negated or both should be non-negated.
+ if (predRegNumDst != predRegNumSrc ||
+ HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
+ getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
+ return false;
+ }
+
+ // Make sure that other than the new-value register no other store instruction
+ // register has been modified in the same packet. Predicate registers can be
+ // modified by they should not be modified between the producer and the store
+ // instruction as it will make them both conditional on different values.
+ // We already know this to be true for all the instructions before and
+ // including PacketMI. Howerver, we need to perform the check for the
+ // remaining instructions in the packet.
+
+ unsigned StartCheck = 0;
+
+ for (auto I : CurrentPacketMIs) {
+ SUnit *TempSU = MIToSUnit.find(I)->second;
+ MachineInstr* TempMI = TempSU->getInstr();
+
+ // Following condition is true for all the instructions until PacketMI is
+ // reached (StartCheck is set to 0 before the for loop).
+ // StartCheck flag is 1 for all the instructions after PacketMI.
+ if (TempMI != PacketMI && !StartCheck) // Start processing only after
+ continue; // encountering PacketMI.
+
+ StartCheck = 1;
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
+ continue;
+
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
+ return false;
+ }
+
+ // Make sure that for non-POST_INC stores:
+ // 1. The only use of reg is DepReg and no other registers.
+ // This handles V4 base+index registers.
+ // The following store can not be dot new.
+ // Eg. r0 = add(r0, #3)
+ // memw(r1+r0<<#2) = r0
+ if (!HII->isPostIncrement(MI)) {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.getReg() == DepReg)
+ return false;
+ }
+ }
+
+ // If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == DepReg || HRI->isSuperRegister(DepReg, R))
+ return false;
+ }
+
+ // Handle imp-use of super reg case. There is a target independent side
+ // change that should prevent this situation but I am handling it for
+ // just-in-case. For example, we cannot newify R2 in the following case:
+ // %R3<def> = A2_tfrsi 0;
+ // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
+ return false;
+ }
+
+ // Can be dot new store.
+ return true;
+}
+
+// Can this MI to promoted to either new value store or new value jump.
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg,
+ MachineBasicBlock::iterator &MII) {
+ if (!HII->mayBeNewStore(MI))
+ return false;
+
+ // Check to see the store can be new value'ed.
+ MachineInstr *PacketMI = PacketSU->getInstr();
+ if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
+ return true;
+
+ // Check to see the compare/jump can be new value'ed.
+ // This is done as a pass on its own. Don't need to check it here.
+ return false;
+}
+
+static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+ return true;
+ return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ // Already a dot new instruction.
+ if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
+ return false;
+
+ if (!isNewifiable(MI))
+ return false;
+
+ const MachineInstr *PI = PacketSU->getInstr();
+
+ // The "new value" cannot come from inline asm.
+ if (PI->isInlineAsm())
+ return false;
+
+ // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
+ // sense.
+ if (PI->isImplicitDef())
+ return false;
+
+ // If dependency is trough an implicitly defined register, we should not
+ // newify the use.
+ if (isImplicitDependency(PI, DepReg))
+ return false;
+
+ const MCInstrDesc& MCID = PI->getDesc();
+ const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+ if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
+ return false;
+
+ // predicate .new
+ // bug 5670: until that is fixed
+ // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+ if (RC == &Hexagon::PredRegsRegClass)
+ if (HII->isCondInst(MI) || MI->isReturn())
+ return HII->predCanBeUsedAsDotNew(PI, DepReg);
+
+ if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
+ return false;
+
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ int NewOpcode = HII->getDotNewOp(MI);
+ const MCInstrDesc &D = HII->get(NewOpcode);
+ MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MF.DeleteMachineInstr(NewMI);
+ if (!ResourcesAvailable)
+ return false;
+
+ // New Value Store only. New Value Jump generated as a separate pass.
+ if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
+ return false;
+
+ return true;
+}
+
+// Go through the packet instructions and search for an anti dependency between
+// them and DepReg from MI. Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+// b) %P0<def> = C2_or %P3<kill>, %P0<kill>
+// c) %P3<def> = C2_tfrrp %R23
+// d) %R1<def> = C2_cmovenewit %P3, 4
+// }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti-dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+ unsigned DepReg) {
+ SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
+
+ for (auto I : CurrentPacketMIs) {
+ // We only care for dependencies to predicated instructions
+ if (!HII->isPredicated(I))
+ continue;
+
+ // Scheduling Unit for current insn in the packet
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+
+ // Look at dependencies between current members of the packet and
+ // predicate defining instruction MI. Make sure that dependency is
+ // on the exact register we care about.
+ if (PacketSU->isSucc(PacketSUDep)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ auto &Dep = PacketSU->Succs[i];
+ if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
+ Dep.getReg() == DepReg)
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+/// Gets the predicate register of a predicated instruction.
+static unsigned getPredicatedRegister(MachineInstr *MI,
+ const HexagonInstrInfo *QII) {
+ /// We use the following rule: The first predicate register that is a use is
+ /// the predicate register of a predicated instruction.
+ assert(QII->isPredicated(MI) && "Must be predicated instruction");
+
+ for (auto &Op : MI->operands()) {
+ if (Op.isReg() && Op.getReg() && Op.isUse() &&
+ Hexagon::PredRegsRegClass.contains(Op.getReg()))
+ return Op.getReg();
+ }
+
+ llvm_unreachable("Unknown instruction operand layout");
+ return 0;
+}
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements.
+bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
+ MachineInstr *MI2) {
+ // If we don't know the predicate sense of the instructions bail out early, we
+ // need it later.
+ if (getPredicateSense(MI1, HII) == PK_Unknown ||
+ getPredicateSense(MI2, HII) == PK_Unknown)
+ return false;
+
+ // Scheduling unit for candidate.
+ SUnit *SU = MIToSUnit[MI1];
+
+ // One corner case deals with the following scenario:
+ // Trying to add
+ // a) %R24<def> = A2_tfrt %P0, %R25
+ // to this packet:
+ // {
+ // b) %R25<def> = A2_tfrf %P0, %R24
+ // c) %P0<def> = C2_cmpeqi %R26, 1
+ // }
+ //
+ // On general check a) and b) are complements, but presence of c) will
+ // convert a) to .new form, and then it is not a complement.
+ // We attempt to detect it by analyzing existing dependencies in the packet.
+
+ // Analyze relationships between all existing members of the packet.
+ // Look for Anti dependecy on the same predicate reg as used in the
+ // candidate.
+ for (auto I : CurrentPacketMIs) {
+ // Scheduling Unit for current insn in the packet.
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+
+ // If this instruction in the packet is succeeded by the candidate...
+ if (PacketSU->isSucc(SU)) {
+ for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+ auto Dep = PacketSU->Succs[i];
+ // The corner case exist when there is true data dependency between
+ // candidate and one of current packet members, this dep is on
+ // predicate reg, and there already exist anti dep on the same pred in
+ // the packet.
+ if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
+ Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
+ // Here I know that I is predicate setting instruction with true
+ // data dep to candidate on the register we care about - c) in the
+ // above example. Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the same packet on the pred
+ // reg of interest.
+ if (restrictingDepExistInPacket(I, Dep.getReg()))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If the above case does not apply, check regular complement condition.
+ // Check that the predicate register is the same and that the predicate
+ // sense is different We also need to differentiate .old vs. .new: !p0
+ // is not complementary to p0.new.
+ unsigned PReg1 = getPredicatedRegister(MI1, HII);
+ unsigned PReg2 = getPredicatedRegister(MI2, HII);
+ return PReg1 == PReg2 &&
+ Hexagon::PredRegsRegClass.contains(PReg1) &&
+ Hexagon::PredRegsRegClass.contains(PReg2) &&
+ getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
+ HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
+}
+
+// Initialize packetizer flags.
+void HexagonPacketizerList::initPacketizerState() {
+ Dependence = false;
+ PromotedToDotNew = false;
+ GlueToNewValueJump = false;
+ GlueAllocframeStore = false;
+ FoundSequentialDependence = false;
+}
+
+// Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock*) {
+ if (MI->isDebugValue())
+ return true;
+
+ if (MI->isCFIInstruction())
+ return false;
+
+ // We must print out inline assembly.
+ if (MI->isInlineAsm())
+ return false;
+
+ if (MI->isImplicitDef())
+ return false;
+
+ // We check if MI has any functional units mapped to it. If it doesn't,
+ // we ignore the instruction.
+ const MCInstrDesc& TID = MI->getDesc();
+ auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
+ unsigned FuncUnits = IS->getUnits();
+ return !FuncUnits;
+}
+
+bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
+ if (MI->isEHLabel() || MI->isCFIInstruction())
+ return true;
+
+ // Consider inline asm to not be a solo instruction by default.
+ // Inline asm will be put in a packet temporarily, but then it will be
+ // removed, and placed outside of the packet (before or after, depending
+ // on dependencies). This is to reduce the impact of inline asm as a
+ // "packet splitting" instruction.
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
+ return true;
+
+ // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+ // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+ // They must not be grouped with other instructions in a packet.
+ if (isSchedBarrier(MI))
+ return true;
+
+ if (HII->isSolo(MI))
+ return true;
+
+ if (MI->getOpcode() == Hexagon::A2_nop)
+ return true;
+
+ return false;
+}
+
+
+// Quick check if instructions MI and MJ cannot coexist in the same packet.
+// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
+// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
+// For full test call this function twice:
+// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
+// Doing the test only one way saves the amount of code in this function,
+// since every test would need to be repeated with the MI and MJ reversed.
+static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+ const HexagonInstrInfo &HII) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+ HII.isHVXMemWithAIndirect(MI, MJ))
+ return true;
+
+ // An inline asm cannot be together with a branch, because we may not be
+ // able to remove the asm out after packetizing (i.e. if the asm must be
+ // moved past the bundle). Similarly, two asms cannot be together to avoid
+ // complications when determining their relative order outside of a bundle.
+ if (MI->isInlineAsm())
+ return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
+ MJ->isCall() || MJ->isTerminator();
+
+ // "False" really means that the quick check failed to determine if
+ // I and J cannot coexist.
+ return false;
+}
+
+
+// Full, symmetric check.
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
+ const MachineInstr *MJ) {
+ return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
+}
+
+void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
+ for (auto &B : MF) {
+ MachineBasicBlock::iterator BundleIt;
+ MachineBasicBlock::instr_iterator NextI;
+ for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+ if (MI->isBundle())
+ BundleIt = I;
+ if (!MI->isInsideBundle())
+ continue;
+
+ // Decide on where to insert the instruction that we are pulling out.
+ // Debug instructions always go before the bundle, but the placement of
+ // INLINE_ASM depends on potential dependencies. By default, try to
+ // put it before the bundle, but if the asm writes to a register that
+ // other instructions in the bundle read, then we need to place it
+ // after the bundle (to preserve the bundle semantics).
+ bool InsertBeforeBundle;
+ if (MI->isInlineAsm())
+ InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
+ else if (MI->isDebugValue())
+ InsertBeforeBundle = true;
+ else
+ continue;
+
+ BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
+ }
+ }
+}
+
+// Check if a given instruction is of class "system".
+static bool isSystemInstr(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::Y2_barrier:
+ case Hexagon::Y2_dcfetchbo:
+ return true;
+ }
+ return false;
+}
+
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // The dependence graph may not include edges between dead definitions,
+ // so without extra checks, we could end up packetizing two instruction
+ // defining the same (dead) register.
+ if (I->isCall() || J->isCall())
+ return false;
+ if (HII->isPredicated(I) || HII->isPredicated(J))
+ return false;
+
+ BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
+ for (auto &MO : I->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ DeadDefs[MO.getReg()] = true;
+ }
+
+ for (auto &MO : J->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ unsigned R = MO.getReg();
+ if (R != Hexagon::USR_OVF && DeadDefs[R])
+ return true;
+ }
+ return false;
+}
+
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((HII->isSaveCalleeSavedRegsCall(I) &&
+ doesModifyCalleeSavedReg(J, HRI)) ||
+ (HII->isSaveCalleeSavedRegsCall(J) &&
+ doesModifyCalleeSavedReg(I, HRI)))
+ return true;
+
+ // Two control flow instructions cannot go in the same packet.
+ if (isControlFlow(I) && isControlFlow(J))
+ return true;
+
+ // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
+ // contain a speculative indirect jump,
+ // a new-value compare jump or a dealloc_return.
+ auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
+ if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+ return true;
+ if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
+ return true;
+ return false;
+ };
+
+ if (HII->isLoopN(I) && isBadForLoopN(J))
+ return true;
+ if (HII->isLoopN(J) && isBadForLoopN(I))
+ return true;
+
+ // dealloc_return cannot appear in the same packet as a conditional or
+ // unconditional jump.
+ return HII->isDeallocRet(I) &&
+ (J->isBranch() || J->isCall() || J->isBarrier());
+}
+
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
+ bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+ if ((SysI && StoreJ) || (SysJ && StoreI))
+ return true;
+
+ if (StoreI && StoreJ) {
+ if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
+ return true;
+ } else {
+ // A memop cannot be in the same packet with another memop or a store.
+ // Two stores can be together, but here I and J cannot both be stores.
+ bool MopStI = HII->isMemOp(I) || StoreI;
+ bool MopStJ = HII->isMemOp(J) || StoreJ;
+ if (MopStI && MopStJ)
+ return true;
+ }
+
+ return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
+}
+
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ // Clear IgnoreDepMIs when Packet starts.
+ if (CurrentPacketMIs.size() == 1)
+ IgnoreDepMIs.clear();
+
+ MachineBasicBlock::iterator II = I;
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ // Solo instructions cannot go in the packet.
+ assert(!isSoloInstruction(I) && "Unexpected solo instr!");
+
+ if (cannotCoexist(I, J))
+ return false;
+
+ Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
+ if (Dependence)
+ return false;
+
+ // V4 allows dual stores. It does not allow second store, if the first
+ // store is not in SLOT0. New value store, new value jump, dealloc_return
+ // and memop always take SLOT0. Arch spec 3.4.4.2.
+ Dependence = hasV4SpecificDependence(I, J);
+ if (Dependence)
+ return false;
+
+ // If an instruction feeds new value jump, glue it.
+ MachineBasicBlock::iterator NextMII = I;
+ ++NextMII;
+ if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
+ MachineInstr *NextMI = NextMII;
+
+ bool secondRegMatch = false;
+ const MachineOperand &NOp0 = NextMI->getOperand(0);
+ const MachineOperand &NOp1 = NextMI->getOperand(1);
+
+ if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
+ secondRegMatch = true;
+
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ MachineInstr *PI = PacketSU->getInstr();
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
+ if (PI->isCall()) {
+ Dependence = true;
+ break;
+ }
+ // Validate:
+ // 1. Packet does not have a store in it.
+ // 2. If the first operand of the nvj is newified, and the second
+ // operand is also a reg, it (second reg) is not defined in
+ // the same packet.
+ // 3. If the second operand of the nvj is newified, (which means
+ // first operand is also a reg), first reg is not defined in
+ // the same packet.
+ if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+ HII->isLoopN(PI)) {
+ Dependence = true;
+ break;
+ }
+ // Check #2/#3.
+ const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
+ if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
+ Dependence = true;
+ break;
+ }
+ }
+
+ if (Dependence)
+ return false;
+ GlueToNewValueJump = true;
+ }
+
+ // There no dependency between a prolog instruction and its successor.
+ if (!SUJ->isSucc(SUI))
+ return true;
+
+ for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
+ if (FoundSequentialDependence)
+ break;
+
+ if (SUJ->Succs[i].getSUnit() != SUI)
+ continue;
+
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+ // For direct calls:
+ // Ignore register dependences for call instructions for packetization
+ // purposes except for those due to r31 and predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions.
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly isCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (isCallDependent) function. Bug 6216 is opened for this.
+ unsigned DepReg = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = HRI->getMinimalPhysRegClass(DepReg);
+ }
+
+ if (I->isCall() || I->isReturn()) {
+ if (!isRegDependence(DepType))
+ continue;
+ if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
+ continue;
+ }
+
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
+ if (promoteToDotCur(J, DepType, II, RC))
+ continue;
+ }
+
+ // Data dpendence ok if we have load.cur.
+ if (DepType == SDep::Data && HII->isDotCurInst(J)) {
+ if (HII->isV60VectorInstruction(I))
+ continue;
+ }
+
+ // For instructions that can be promoted to dot-new, try to promote.
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
+ if (promoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ continue;
+ }
+ }
+ if (HII->isNewValueJump(I))
+ continue;
+ }
+
+ // For predicated instructions, if the predicates are complements then
+ // there can be no dependence.
+ if (HII->isPredicated(I) && HII->isPredicated(J) &&
+ arePredicatesComplements(I, J)) {
+ // Not always safe to do this translation.
+ // DAG Builder attempts to reduce dependence edges using transitive
+ // nature of dependencies. Here is an example:
+ //
+ // r0 = tfr_pt ... (1)
+ // r0 = tfr_pf ... (2)
+ // r0 = tfr_pt ... (3)
+ //
+ // There will be an output dependence between (1)->(2) and (2)->(3).
+ // However, there is no dependence edge between (1)->(3). This results
+ // in all 3 instructions going in the same packet. We ignore dependce
+ // only once to avoid this situation.
+ auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+ if (Itr != IgnoreDepMIs.end()) {
+ Dependence = true;
+ return false;
+ }
+ IgnoreDepMIs.push_back(I);
+ continue;
+ }
+
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions.
+ if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+ DepType == SDep::Order)
+ continue;
+
+ // Ignore all dependences for jumps except for true and output
+ // dependences.
+ if (I->isConditionalBranch() && DepType != SDep::Data &&
+ DepType != SDep::Output)
+ continue;
+
+ // Ignore output dependences due to superregs. We can write to two
+ // different subregisters of R1:0 for instance in the same cycle.
+
+ // If neither I nor J defines DepReg, then this is a superfluous output
+ // dependence. The dependence must be of the form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions with
+ // DepReg = D0.
+ // We want to ignore these dependences. Ideally, the dependence
+ // constructor should annotate such dependences. We can then avoid this
+ // relatively expensive check.
+ //
+ if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+ continue;
+ FoundSequentialDependence = true;
+ break;
+ }
+
+ // For Order dependences:
+ // 1. On V4 or later, volatile loads/stores can be packetized together,
+ // unless other rules prevent is.
+ // 2. Store followed by a load is not allowed.
+ // 3. Store followed by a store is only valid on V4 or later.
+ // 4. Load followed by any memory operation is allowed.
+ if (DepType == SDep::Order) {
+ if (!PacketizeVolatiles) {
+ bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+ if (OrdRefs) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+ // J is first, I is second.
+ bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
+ bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+ if (StoreJ) {
+ // Two stores are only allowed on V4+. Load following store is never
+ // allowed.
+ if (LoadI) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ } else if (!LoadJ || (!LoadI && !StoreI)) {
+ // If J is neither load nor store, assume a dependency.
+ // If J is a load, but I is neither, also assume a dependency.
+ FoundSequentialDependence = true;
+ break;
+ }
+ // Store followed by store: not OK on V2.
+ // Store followed by load: not OK on all.
+ // Load followed by store: OK on all.
+ // Load followed by load: OK on all.
+ continue;
+ }
+
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCFRAME and subsequent store, allow it to be packetized
+ // in a same packet. This implies that the store is using the caller's
+ // SP. Hence, offset needs to be updated accordingly.
+ if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
+ int64_t Imm = I->getOperand(1).getImm();
+ int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
+ if (HII->isValidOffset(Opc, NewOff)) {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e.
+ // caller's SP. Therefore, we need to recalculate offset
+ // according to this change.
+ I->getOperand(1).setImm(NewOff);
+ continue;
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ // Skip over anti-dependences. Two instructions that are anti-dependent
+ // can share a packet.
+ if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
+ }
+ }
+
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+
+ return true;
+}
+
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ if (cannotCoexist(I, J))
+ return false;
+
+ if (!Dependence)
+ return true;
+
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew)
+ demoteToDotOld(I);
+
+ cleanUpDotCur();
+ // Check if the instruction (must be a store) was glued with an allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // current SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ MachineOperand &MOff = I->getOperand(1);
+ MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
+ }
+ return false;
+}
+
+
+MachineBasicBlock::iterator
+HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (MI->isImplicitDef()) {
+ unsigned R = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(R)) {
+ MCSuperRegIterator S(R, HRI, false);
+ MI->addOperand(MachineOperand::CreateReg(*S, true, true));
+ }
+ return MII;
+ }
+ assert(ResourceTracker->canReserveResources(MI));
+
+ bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
+ bool Good = true;
+
+ if (GlueToNewValueJump) {
+ MachineInstr *NvjMI = ++MII;
+ // We need to put both instructions in the same packet: MI and NvjMI.
+ // Either of them can require a constant extender. Try to add both to
+ // the current packet, and if that fails, end the packet and start a
+ // new one.
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI)
+ Good = tryAllocateResourcesForConstExt(true);
+
+ bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
+ if (Good) {
+ if (ResourceTracker->canReserveResources(NvjMI))
+ ResourceTracker->reserveResources(NvjMI);
+ else
+ Good = false;
+ }
+ if (Good && ExtNvjMI)
+ Good = tryAllocateResourcesForConstExt(true);
+
+ if (!Good) {
+ endPacket(MBB, MI);
+ assert(ResourceTracker->canReserveResources(MI));
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI) {
+ assert(canReserveResourcesForConstExt());
+ tryAllocateResourcesForConstExt(true);
+ }
+ assert(ResourceTracker->canReserveResources(NvjMI));
+ ResourceTracker->reserveResources(NvjMI);
+ if (ExtNvjMI) {
+ assert(canReserveResourcesForConstExt());
+ reserveResourcesForConstExt();
+ }
+ }
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(NvjMI);
+ return MII;
+ }
+
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
+ endPacket(MBB, MI);
+ if (PromotedToDotNew)
+ demoteToDotOld(MI);
+ ResourceTracker->reserveResources(MI);
+ reserveResourcesForConstExt();
+ }
+
+ CurrentPacketMIs.push_back(MI);
+ return MII;
+}
+
+void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ OldPacketMIs = CurrentPacketMIs;
+ VLIWPacketizerList::endPacket(MBB, MI);
+}
+
+bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
+ return !producesStall(MI);
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+static bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) {
+ if (!ProdMI->getOperand(0).isReg())
+ return false;
+ unsigned DstReg = ProdMI->getOperand(0).getReg();
+
+ for (auto &Op : ConsMI->operands())
+ if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
+ // The MIs depend on each other.
+ return true;
+
+ return false;
+}
+
+// V60 forward scheduling.
+bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+ // Check whether the previous packet is in a different loop. If this is the
+ // case, there is little point in trying to avoid a stall because that would
+ // favor the rare case (loop entry) over the common case (loop iteration).
+ //
+ // TODO: We should really be able to check all the incoming edges if this is
+ // the first packet in a basic block, so we can avoid stalls from the loop
+ // backedge.
+ if (!OldPacketMIs.empty()) {
+ auto *OldBB = OldPacketMIs.front()->getParent();
+ auto *ThisBB = I->getParent();
+ if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
+ return false;
+ }
+
+ // Check for stall between two vector instructions.
+ if (HII->isV60VectorInstruction(I)) {
+ for (auto J : OldPacketMIs) {
+ if (!HII->isV60VectorInstruction(J))
+ continue;
+ if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+ return true;
+ }
+ return false;
+ }
+
+ // Check for stall between two scalar instructions. First, check that
+ // there is no definition of a use in the current packet, because it
+ // may be a candidate for .new.
+ for (auto J : CurrentPacketMIs)
+ if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+ return false;
+
+ // Check for stall between I and instructions in the previous packet.
+ if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
+ for (auto J : OldPacketMIs) {
+ if (HII->isV60VectorInstruction(J))
+ continue;
+ if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+ continue;
+ if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+ return new HexagonPacketizer();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
new file mode 100644
index 0000000..960cf6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -0,0 +1,114 @@
+#ifndef HEXAGONVLIWPACKETIZER_H
+#define HEXAGONVLIWPACKETIZER_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+namespace llvm {
+class HexagonPacketizerList : public VLIWPacketizerList {
+ // Vector of instructions assigned to the packet that has just been created.
+ std::vector<MachineInstr*> OldPacketMIs;
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ // Track MIs with ignored dependence.
+ std::vector<MachineInstr*> IgnoreDepMIs;
+
+protected:
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+ const MachineLoopInfo *MLI;
+
+private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState() override;
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock *MBB) override;
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(const MachineInstr *MI) override;
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
+ void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override;
+ bool shouldAddToPacket(const MachineInstr *MI) override;
+
+ void unpacketizeSoloInstrs(MachineFunction &MF);
+
+protected:
+ bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+ unsigned DepReg);
+ bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ void cleanUpDotCur();
+
+ bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII);
+ bool canPromoteToNewValueStore(const MachineInstr* MI,
+ const MachineInstr* PacketMI, unsigned DepReg);
+ bool demoteToDotOld(MachineInstr* MI);
+ bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2);
+ bool restrictingDepExistInPacket(MachineInstr*, unsigned);
+ bool isNewifiable(const MachineInstr *MI);
+ bool isCurifiable(MachineInstr* MI);
+ bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+ inline bool isPromotedToDotNew() const {
+ return PromotedToDotNew;
+ }
+ bool tryAllocateResourcesForConstExt(bool Reserve);
+ bool canReserveResourcesForConstExt();
+ void reserveResourcesForConstExt();
+ bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
+ bool producesStall(const MachineInstr *MI);
+};
+} // namespace llvm
+#endif // HEXAGONVLIWPACKETIZER_H
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
new file mode 100644
index 0000000..b73af82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -0,0 +1,360 @@
+//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonFixupKinds.h"
+#include "HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-asm-backend"
+
+namespace {
+
+class HexagonAsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+ StringRef CPU;
+ mutable uint64_t relaxedCnt;
+ std::unique_ptr <MCInstrInfo> MCII;
+ std::unique_ptr <MCInst *> RelaxTarget;
+ MCInst * Extender;
+public:
+ HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) :
+ OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+ Extender(nullptr) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createHexagonELFObjectWriter(OS, OSABI, CPU);
+ }
+
+ void setExtender(MCContext &Context) const {
+ if (Extender == nullptr)
+ const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+ }
+
+ MCInst *takeExtender() const {
+ assert(Extender != nullptr);
+ MCInst * Result = Extender;
+ const_cast<HexagonAsmBackend *>(this)->Extender = nullptr;
+ return Result;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ return Hexagon::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = {
+ // This table *must* be in same the order of fixup_* kinds in
+ // HexagonFixupKinds.h.
+ //
+ // namei offset bits flags
+ {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_LO16", 0, 32, 0},
+ {"fixup_Hexagon_HI16", 0, 32, 0},
+ {"fixup_Hexagon_32", 0, 32, 0},
+ {"fixup_Hexagon_16", 0, 32, 0},
+ {"fixup_Hexagon_8", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_0", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_1", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_2", 0, 32, 0},
+ {"fixup_Hexagon_GPREL16_3", 0, 32, 0},
+ {"fixup_Hexagon_HL16", 0, 32, 0},
+ {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_16_X", 0, 32, 0},
+ {"fixup_Hexagon_12_X", 0, 32, 0},
+ {"fixup_Hexagon_11_X", 0, 32, 0},
+ {"fixup_Hexagon_10_X", 0, 32, 0},
+ {"fixup_Hexagon_9_X", 0, 32, 0},
+ {"fixup_Hexagon_8_X", 0, 32, 0},
+ {"fixup_Hexagon_7_X", 0, 32, 0},
+ {"fixup_Hexagon_6_X", 0, 32, 0},
+ {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_COPY", 0, 32, 0},
+ {"fixup_Hexagon_GLOB_DAT", 0, 32, 0},
+ {"fixup_Hexagon_JMP_SLOT", 0, 32, 0},
+ {"fixup_Hexagon_RELATIVE", 0, 32, 0},
+ {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_32", 0, 32, 0},
+ {"fixup_Hexagon_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_DTPMOD_32", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_32", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_16", 0, 32, 0},
+ {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_IE_LO16", 0, 32, 0},
+ {"fixup_Hexagon_IE_HI16", 0, 32, 0},
+ {"fixup_Hexagon_IE_32", 0, 32, 0},
+ {"fixup_Hexagon_IE_16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_32", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_LO16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_HI16", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_32", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_16", 0, 32, 0},
+ {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_16_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0},
+ {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_16_X", 0, 32, 0},
+ {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}};
+
+ if (Kind < FirstTargetFixupKind) {
+ return MCAsmBackend::getFixupKindInfo(Kind);
+ }
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/,
+ unsigned /*DataSize*/, uint64_t /*Value*/,
+ bool /*IsPCRel*/) const override {
+ return;
+ }
+
+ bool isInstRelaxable(MCInst const &HMI) const {
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI);
+ bool Relaxable = false;
+ // Branches and loop-setup insns are handled as necessary by relaxation.
+ if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ ||
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV &&
+ MCID.isBranch()) ||
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR &&
+ HMI.getOpcode() != Hexagon::C4_addipc))
+ if (HexagonMCInstrInfo::isExtendable(*MCII, HMI))
+ Relaxable = true;
+
+ return Relaxable;
+ }
+
+ /// MayNeedRelaxation - Check whether the given instruction may need
+ /// relaxation.
+ ///
+ /// \param Inst - The instruction to test.
+ bool mayNeedRelaxation(MCInst const &Inst) const override {
+ assert(HexagonMCInstrInfo::isBundle(Inst));
+ bool PreviousIsExtender = false;
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ auto const &Inst = *I.getInst();
+ if (!PreviousIsExtender) {
+ if (isInstRelaxable(Inst))
+ return true;
+ }
+ PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst);
+ }
+ return false;
+ }
+
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ MCInst const &MCB = DF->getInst();
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+
+ *RelaxTarget = nullptr;
+ MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction(
+ MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE));
+ // If we cannot resolve the fixup value, it requires relaxation.
+ if (!Resolved) {
+ switch ((unsigned)Fixup.getKind()) {
+ case fixup_Hexagon_B22_PCREL:
+ // GetFixupCount assumes B22 won't relax
+ // Fallthrough
+ default:
+ return false;
+ break;
+ case fixup_Hexagon_B13_PCREL:
+ case fixup_Hexagon_B15_PCREL:
+ case fixup_Hexagon_B9_PCREL:
+ case fixup_Hexagon_B7_PCREL: {
+ if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+ ++relaxedCnt;
+ *RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
+ return true;
+ } else {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+ bool Relaxable = isInstRelaxable(MCI);
+ if (Relaxable == false)
+ return false;
+
+ MCFixupKind Kind = Fixup.getKind();
+ int64_t sValue = Value;
+ int64_t maxValue;
+
+ switch ((unsigned)Kind) {
+ case fixup_Hexagon_B7_PCREL:
+ maxValue = 1 << 8;
+ break;
+ case fixup_Hexagon_B9_PCREL:
+ maxValue = 1 << 10;
+ break;
+ case fixup_Hexagon_B15_PCREL:
+ maxValue = 1 << 16;
+ break;
+ case fixup_Hexagon_B22_PCREL:
+ maxValue = 1 << 23;
+ break;
+ default:
+ maxValue = INT64_MAX;
+ break;
+ }
+
+ bool isFarAway = -maxValue > sValue || sValue > maxValue - 1;
+
+ if (isFarAway) {
+ if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+ ++relaxedCnt;
+ *RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /// Simple predicate for targets where !Resolved implies requiring relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
+ }
+
+ void relaxInstruction(MCInst const & Inst,
+ MCInst & Res) const override {
+ assert(HexagonMCInstrInfo::isBundle(Inst) &&
+ "Hexagon relaxInstruction only works on bundles");
+
+ Res = HexagonMCInstrInfo::createBundle();
+ // Copy the results into the bundle.
+ bool Update = false;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst());
+
+ // if immediate extender needed, add it in
+ if (*RelaxTarget == &CrntHMI) {
+ Update = true;
+ assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) &&
+ "No room to insert extender for relaxation");
+
+ MCInst *HMIx = takeExtender();
+ *HMIx = HexagonMCInstrInfo::deriveExtender(
+ *MCII, CrntHMI,
+ HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI));
+ Res.addOperand(MCOperand::createInst(HMIx));
+ *RelaxTarget = nullptr;
+ }
+ // now copy over the original instruction(the one we may have extended)
+ Res.addOperand(MCOperand::createInst(I.getInst()));
+ }
+ (void)Update;
+ assert(Update && "Didn't find relaxation target");
+ }
+
+ bool writeNopData(uint64_t Count,
+ MCObjectWriter * OW) const override {
+ static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
+ ParseIn = 0x00004000, // In packet parse-bits.
+ ParseEnd = 0x0000c000; // End of packet parse-bits.
+
+ while(Count % HEXAGON_INSTR_SIZE) {
+ DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" <<
+ Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n");
+ --Count;
+ OW->write8(0);
+ }
+
+ while(Count) {
+ Count -= HEXAGON_INSTR_SIZE;
+ // Close the packet whenever a multiple of the maximum packet size remains
+ uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))?
+ ParseIn: ParseEnd;
+ OW->write32(Nopcode | ParseBits);
+ }
+ return true;
+ }
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+ MCRegisterInfo const & /*MRI*/,
+ const Triple &TT, StringRef CPU) {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ return new HexagonAsmBackend(T, OSABI, CPU);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
new file mode 100644
index 0000000..47a6f86
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -0,0 +1,285 @@
+//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Hexagon target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <stdint.h>
+
+namespace llvm {
+
+/// HexagonII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace HexagonII {
+ // *** The code below must match HexagonInstrFormat*.td *** //
+
+ // Insn types.
+ // *** Must match HexagonInstrFormat*.td ***
+ enum Type {
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypeDUPLEX = 11,
+ TypeCOMPOUND = 12,
+ TypeCVI_FIRST = 13,
+ TypeCVI_VA = TypeCVI_FIRST,
+ TypeCVI_VA_DV = 14,
+ TypeCVI_VX = 15,
+ TypeCVI_VX_DV = 16,
+ TypeCVI_VP = 17,
+ TypeCVI_VP_VS = 18,
+ TypeCVI_VS = 19,
+ TypeCVI_VINLANESAT= 20,
+ TypeCVI_VM_LD = 21,
+ TypeCVI_VM_TMP_LD = 22,
+ TypeCVI_VM_CUR_LD = 23,
+ TypeCVI_VM_VP_LDU = 24,
+ TypeCVI_VM_ST = 25,
+ TypeCVI_VM_NEW_ST = 26,
+ TypeCVI_VM_STU = 27,
+ TypeCVI_HIST = 28,
+ TypeCVI_LAST = TypeCVI_HIST,
+ TypePREFIX = 30, // Such as extenders.
+ TypeENDLOOP = 31 // Such as end of a HW loop.
+ };
+
+ enum SubTarget {
+ HasV2SubT = 0xf,
+ HasV2SubTOnly = 0x1,
+ NoV2SubT = 0x0,
+ HasV3SubT = 0xe,
+ HasV3SubTOnly = 0x2,
+ NoV3SubT = 0x1,
+ HasV4SubT = 0xc,
+ NoV4SubT = 0x3,
+ HasV5SubT = 0x8,
+ NoV5SubT = 0x7
+ };
+
+ enum AddrMode {
+ NoAddrMode = 0, // No addressing mode
+ Absolute = 1, // Absolute addressing mode
+ AbsoluteSet = 2, // Absolute set addressing mode
+ BaseImmOffset = 3, // Indirect with offset
+ BaseLongOffset = 4, // Indirect with long offset
+ BaseRegOffset = 5, // Indirect with register offset
+ PostInc = 6 // Post increment addressing mode
+ };
+
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+ enum class MemAccessSize {
+ NoMemAccess = 0, // Not a memory acces instruction.
+ ByteAccess = 1, // Byte access instruction (memb).
+ HalfWordAccess = 2, // Half word access instruction (memh).
+ WordAccess = 3, // Word access instruction (memw).
+ DoubleWordAccess = 4, // Double word access instruction (memd)
+ // 5, // We do not have a 16 byte vector access.
+ Vector64Access = 7, // 64 Byte vector access instruction (vmem).
+ Vector128Access = 8 // 128 Byte vector access instruction (vmem).
+ };
+
+ // MCInstrDesc TSFlags
+ // *** Must match HexagonInstrFormat*.td ***
+ enum {
+ // This 5-bit field describes the insn type.
+ TypePos = 0,
+ TypeMask = 0x1f,
+
+ // Solo instructions.
+ SoloPos = 5,
+ SoloMask = 0x1,
+ // Packed only with A or X-type instructions.
+ SoloAXPos = 6,
+ SoloAXMask = 0x1,
+ // Only A-type instruction in first slot or nothing.
+ SoloAin1Pos = 7,
+ SoloAin1Mask = 0x1,
+
+ // Predicated instructions.
+ PredicatedPos = 8,
+ PredicatedMask = 0x1,
+ PredicatedFalsePos = 9,
+ PredicatedFalseMask = 0x1,
+ PredicatedNewPos = 10,
+ PredicatedNewMask = 0x1,
+ PredicateLatePos = 11,
+ PredicateLateMask = 0x1,
+
+ // New-Value consumer instructions.
+ NewValuePos = 12,
+ NewValueMask = 0x1,
+ // New-Value producer instructions.
+ hasNewValuePos = 13,
+ hasNewValueMask = 0x1,
+ // Which operand consumes or produces a new value.
+ NewValueOpPos = 14,
+ NewValueOpMask = 0x7,
+ // Stores that can become new-value stores.
+ mayNVStorePos = 17,
+ mayNVStoreMask = 0x1,
+ // New-value store instructions.
+ NVStorePos = 18,
+ NVStoreMask = 0x1,
+ // Loads that can become current-value loads.
+ mayCVLoadPos = 19,
+ mayCVLoadMask = 0x1,
+ // Current-value load instructions.
+ CVLoadPos = 20,
+ CVLoadMask = 0x1,
+
+ // Extendable insns.
+ ExtendablePos = 21,
+ ExtendableMask = 0x1,
+ // Insns must be extended.
+ ExtendedPos = 22,
+ ExtendedMask = 0x1,
+ // Which operand may be extended.
+ ExtendableOpPos = 23,
+ ExtendableOpMask = 0x7,
+ // Signed or unsigned range.
+ ExtentSignedPos = 26,
+ ExtentSignedMask = 0x1,
+ // Number of bits of range before extending operand.
+ ExtentBitsPos = 27,
+ ExtentBitsMask = 0x1f,
+ // Alignment power-of-two before extending operand.
+ ExtentAlignPos = 32,
+ ExtentAlignMask = 0x3,
+
+ // Valid subtargets
+ validSubTargetPos = 34,
+ validSubTargetMask = 0xf,
+
+ // Addressing mode for load/store instructions.
+ AddrModePos = 40,
+ AddrModeMask = 0x7,
+ // Access size for load/store instructions.
+ MemAccessSizePos = 43,
+ MemAccesSizeMask = 0xf,
+
+ // Branch predicted taken.
+ TakenPos = 47,
+ TakenMask = 0x1,
+
+ // Floating-point instructions.
+ FPPos = 48,
+ FPMask = 0x1,
+
+ // New-Value producer-2 instructions.
+ hasNewValuePos2 = 50,
+ hasNewValueMask2 = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos2 = 51,
+ NewValueOpMask2 = 0x7,
+
+ // Accumulator instructions.
+ AccumulatorPos = 54,
+ AccumulatorMask = 0x1,
+
+ // Complex XU, prevent xu competition by prefering slot3
+ PrefersSlot3Pos = 55,
+ PrefersSlot3Mask = 0x1,
+ };
+
+ // *** The code above must match HexagonInstrFormat*.td *** //
+
+ // Hexagon specific MO operand flag mask.
+ enum HexagonMOTargetFlagVal {
+ //===------------------------------------------------------------------===//
+ // Hexagon Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ HMOTF_ConstExtended = 1,
+
+ /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+ /// Used for computing a global address for PIC compilations
+ MO_PCREL,
+
+ /// MO_GOT - Indicates a GOT-relative relocation
+ MO_GOT,
+
+ // Low or high part of a symbol.
+ MO_LO16, MO_HI16,
+
+ // Offset from the base of the SDA.
+ MO_GPREL
+ };
+
+ // Hexagon Sub-instruction classes.
+ enum SubInstructionGroup {
+ HSIG_None = 0,
+ HSIG_L1,
+ HSIG_L2,
+ HSIG_S1,
+ HSIG_S2,
+ HSIG_A,
+ HSIG_Compound
+ };
+
+ // Hexagon Compound classes.
+ enum CompoundGroup {
+ HCG_None = 0,
+ HCG_A,
+ HCG_B,
+ HCG_C
+ };
+
+ enum InstParseBits {
+ INST_PARSE_MASK = 0x0000c000,
+ INST_PARSE_PACKET_END = 0x0000c000,
+ INST_PARSE_LOOP_END = 0x00008000,
+ INST_PARSE_NOT_END = 0x00004000,
+ INST_PARSE_DUPLEX = 0x00000000,
+ INST_PARSE_EXTENDER = 0x00000000
+ };
+
+ enum InstIClassBits : unsigned {
+ INST_ICLASS_MASK = 0xf0000000,
+ INST_ICLASS_EXTENDER = 0x00000000,
+ INST_ICLASS_J_1 = 0x10000000,
+ INST_ICLASS_J_2 = 0x20000000,
+ INST_ICLASS_LD_ST_1 = 0x30000000,
+ INST_ICLASS_LD_ST_2 = 0x40000000,
+ INST_ICLASS_J_3 = 0x50000000,
+ INST_ICLASS_CR = 0x60000000,
+ INST_ICLASS_ALU32_1 = 0x70000000,
+ INST_ICLASS_XTYPE_1 = 0x80000000,
+ INST_ICLASS_LD = 0x90000000,
+ INST_ICLASS_ST = 0xa0000000,
+ INST_ICLASS_ALU32_2 = 0xb0000000,
+ INST_ICLASS_XTYPE_2 = 0xc0000000,
+ INST_ICLASS_XTYPE_3 = 0xd0000000,
+ INST_ICLASS_XTYPE_4 = 0xe0000000,
+ INST_ICLASS_ALU32_3 = 0xf0000000
+ };
+
+} // End namespace HexagonII.
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
new file mode 100644
index 0000000..da5d4d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -0,0 +1,251 @@
+//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "hexagon-elf-writer"
+
+using namespace llvm;
+using namespace Hexagon;
+
+namespace {
+
+class HexagonELFObjectWriter : public MCELFObjectTargetWriter {
+private:
+ StringRef CPU;
+
+public:
+ HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
+
+ unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+ bool IsPCRel) const override;
+};
+}
+
+HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
+ : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
+ /*HasRelocationAddend*/ true),
+ CPU(C) {}
+
+unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/,
+ MCFixup const &Fixup,
+ bool IsPCRel) const {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+ llvm_unreachable("Unimplemented Fixup kind!");
+ return ELF::R_HEX_NONE;
+ case FK_Data_4:
+ return (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+ case FK_PCRel_4:
+ return ELF::R_HEX_32_PCREL;
+ case FK_Data_2:
+ return ELF::R_HEX_16;
+ case FK_Data_1:
+ return ELF::R_HEX_8;
+ case fixup_Hexagon_B22_PCREL:
+ return ELF::R_HEX_B22_PCREL;
+ case fixup_Hexagon_B15_PCREL:
+ return ELF::R_HEX_B15_PCREL;
+ case fixup_Hexagon_B7_PCREL:
+ return ELF::R_HEX_B7_PCREL;
+ case fixup_Hexagon_LO16:
+ return ELF::R_HEX_LO16;
+ case fixup_Hexagon_HI16:
+ return ELF::R_HEX_HI16;
+ case fixup_Hexagon_32:
+ return ELF::R_HEX_32;
+ case fixup_Hexagon_16:
+ return ELF::R_HEX_16;
+ case fixup_Hexagon_8:
+ return ELF::R_HEX_8;
+ case fixup_Hexagon_GPREL16_0:
+ return ELF::R_HEX_GPREL16_0;
+ case fixup_Hexagon_GPREL16_1:
+ return ELF::R_HEX_GPREL16_1;
+ case fixup_Hexagon_GPREL16_2:
+ return ELF::R_HEX_GPREL16_2;
+ case fixup_Hexagon_GPREL16_3:
+ return ELF::R_HEX_GPREL16_3;
+ case fixup_Hexagon_HL16:
+ return ELF::R_HEX_HL16;
+ case fixup_Hexagon_B13_PCREL:
+ return ELF::R_HEX_B13_PCREL;
+ case fixup_Hexagon_B9_PCREL:
+ return ELF::R_HEX_B9_PCREL;
+ case fixup_Hexagon_B32_PCREL_X:
+ return ELF::R_HEX_B32_PCREL_X;
+ case fixup_Hexagon_32_6_X:
+ return ELF::R_HEX_32_6_X;
+ case fixup_Hexagon_B22_PCREL_X:
+ return ELF::R_HEX_B22_PCREL_X;
+ case fixup_Hexagon_B15_PCREL_X:
+ return ELF::R_HEX_B15_PCREL_X;
+ case fixup_Hexagon_B13_PCREL_X:
+ return ELF::R_HEX_B13_PCREL_X;
+ case fixup_Hexagon_B9_PCREL_X:
+ return ELF::R_HEX_B9_PCREL_X;
+ case fixup_Hexagon_B7_PCREL_X:
+ return ELF::R_HEX_B7_PCREL_X;
+ case fixup_Hexagon_16_X:
+ return ELF::R_HEX_16_X;
+ case fixup_Hexagon_12_X:
+ return ELF::R_HEX_12_X;
+ case fixup_Hexagon_11_X:
+ return ELF::R_HEX_11_X;
+ case fixup_Hexagon_10_X:
+ return ELF::R_HEX_10_X;
+ case fixup_Hexagon_9_X:
+ return ELF::R_HEX_9_X;
+ case fixup_Hexagon_8_X:
+ return ELF::R_HEX_8_X;
+ case fixup_Hexagon_7_X:
+ return ELF::R_HEX_7_X;
+ case fixup_Hexagon_6_X:
+ return ELF::R_HEX_6_X;
+ case fixup_Hexagon_32_PCREL:
+ return ELF::R_HEX_32_PCREL;
+ case fixup_Hexagon_COPY:
+ return ELF::R_HEX_COPY;
+ case fixup_Hexagon_GLOB_DAT:
+ return ELF::R_HEX_GLOB_DAT;
+ case fixup_Hexagon_JMP_SLOT:
+ return ELF::R_HEX_JMP_SLOT;
+ case fixup_Hexagon_RELATIVE:
+ return ELF::R_HEX_RELATIVE;
+ case fixup_Hexagon_PLT_B22_PCREL:
+ return ELF::R_HEX_PLT_B22_PCREL;
+ case fixup_Hexagon_GOTREL_LO16:
+ return ELF::R_HEX_GOTREL_LO16;
+ case fixup_Hexagon_GOTREL_HI16:
+ return ELF::R_HEX_GOTREL_HI16;
+ case fixup_Hexagon_GOTREL_32:
+ return ELF::R_HEX_GOTREL_32;
+ case fixup_Hexagon_GOT_LO16:
+ return ELF::R_HEX_GOT_LO16;
+ case fixup_Hexagon_GOT_HI16:
+ return ELF::R_HEX_GOT_HI16;
+ case fixup_Hexagon_GOT_32:
+ return ELF::R_HEX_GOT_32;
+ case fixup_Hexagon_GOT_16:
+ return ELF::R_HEX_GOT_16;
+ case fixup_Hexagon_DTPMOD_32:
+ return ELF::R_HEX_DTPMOD_32;
+ case fixup_Hexagon_DTPREL_LO16:
+ return ELF::R_HEX_DTPREL_LO16;
+ case fixup_Hexagon_DTPREL_HI16:
+ return ELF::R_HEX_DTPREL_HI16;
+ case fixup_Hexagon_DTPREL_32:
+ return ELF::R_HEX_DTPREL_32;
+ case fixup_Hexagon_DTPREL_16:
+ return ELF::R_HEX_DTPREL_16;
+ case fixup_Hexagon_GD_PLT_B22_PCREL:
+ return ELF::R_HEX_GD_PLT_B22_PCREL;
+ case fixup_Hexagon_LD_PLT_B22_PCREL:
+ return ELF::R_HEX_LD_PLT_B22_PCREL;
+ case fixup_Hexagon_GD_GOT_LO16:
+ return ELF::R_HEX_GD_GOT_LO16;
+ case fixup_Hexagon_GD_GOT_HI16:
+ return ELF::R_HEX_GD_GOT_HI16;
+ case fixup_Hexagon_GD_GOT_32:
+ return ELF::R_HEX_GD_GOT_32;
+ case fixup_Hexagon_GD_GOT_16:
+ return ELF::R_HEX_GD_GOT_16;
+ case fixup_Hexagon_LD_GOT_LO16:
+ return ELF::R_HEX_LD_GOT_LO16;
+ case fixup_Hexagon_LD_GOT_HI16:
+ return ELF::R_HEX_LD_GOT_HI16;
+ case fixup_Hexagon_LD_GOT_32:
+ return ELF::R_HEX_LD_GOT_32;
+ case fixup_Hexagon_LD_GOT_16:
+ return ELF::R_HEX_LD_GOT_16;
+ case fixup_Hexagon_IE_LO16:
+ return ELF::R_HEX_IE_LO16;
+ case fixup_Hexagon_IE_HI16:
+ return ELF::R_HEX_IE_HI16;
+ case fixup_Hexagon_IE_32:
+ return ELF::R_HEX_IE_32;
+ case fixup_Hexagon_IE_GOT_LO16:
+ return ELF::R_HEX_IE_GOT_LO16;
+ case fixup_Hexagon_IE_GOT_HI16:
+ return ELF::R_HEX_IE_GOT_HI16;
+ case fixup_Hexagon_IE_GOT_32:
+ return ELF::R_HEX_IE_GOT_32;
+ case fixup_Hexagon_IE_GOT_16:
+ return ELF::R_HEX_IE_GOT_16;
+ case fixup_Hexagon_TPREL_LO16:
+ return ELF::R_HEX_TPREL_LO16;
+ case fixup_Hexagon_TPREL_HI16:
+ return ELF::R_HEX_TPREL_HI16;
+ case fixup_Hexagon_TPREL_32:
+ return ELF::R_HEX_TPREL_32;
+ case fixup_Hexagon_TPREL_16:
+ return ELF::R_HEX_TPREL_16;
+ case fixup_Hexagon_6_PCREL_X:
+ return ELF::R_HEX_6_PCREL_X;
+ case fixup_Hexagon_GOTREL_32_6_X:
+ return ELF::R_HEX_GOTREL_32_6_X;
+ case fixup_Hexagon_GOTREL_16_X:
+ return ELF::R_HEX_GOTREL_16_X;
+ case fixup_Hexagon_GOTREL_11_X:
+ return ELF::R_HEX_GOTREL_11_X;
+ case fixup_Hexagon_GOT_32_6_X:
+ return ELF::R_HEX_GOT_32_6_X;
+ case fixup_Hexagon_GOT_16_X:
+ return ELF::R_HEX_GOT_16_X;
+ case fixup_Hexagon_GOT_11_X:
+ return ELF::R_HEX_GOT_11_X;
+ case fixup_Hexagon_DTPREL_32_6_X:
+ return ELF::R_HEX_DTPREL_32_6_X;
+ case fixup_Hexagon_DTPREL_16_X:
+ return ELF::R_HEX_DTPREL_16_X;
+ case fixup_Hexagon_DTPREL_11_X:
+ return ELF::R_HEX_DTPREL_11_X;
+ case fixup_Hexagon_GD_GOT_32_6_X:
+ return ELF::R_HEX_GD_GOT_32_6_X;
+ case fixup_Hexagon_GD_GOT_16_X:
+ return ELF::R_HEX_GD_GOT_16_X;
+ case fixup_Hexagon_GD_GOT_11_X:
+ return ELF::R_HEX_GD_GOT_11_X;
+ case fixup_Hexagon_LD_GOT_32_6_X:
+ return ELF::R_HEX_LD_GOT_32_6_X;
+ case fixup_Hexagon_LD_GOT_16_X:
+ return ELF::R_HEX_LD_GOT_16_X;
+ case fixup_Hexagon_LD_GOT_11_X:
+ return ELF::R_HEX_LD_GOT_11_X;
+ case fixup_Hexagon_IE_32_6_X:
+ return ELF::R_HEX_IE_32_6_X;
+ case fixup_Hexagon_IE_16_X:
+ return ELF::R_HEX_IE_16_X;
+ case fixup_Hexagon_IE_GOT_32_6_X:
+ return ELF::R_HEX_IE_GOT_32_6_X;
+ case fixup_Hexagon_IE_GOT_16_X:
+ return ELF::R_HEX_IE_GOT_16_X;
+ case fixup_Hexagon_IE_GOT_11_X:
+ return ELF::R_HEX_IE_GOT_11_X;
+ case fixup_Hexagon_TPREL_32_6_X:
+ return ELF::R_HEX_TPREL_32_6_X;
+ case fixup_Hexagon_TPREL_16_X:
+ return ELF::R_HEX_TPREL_16_X;
+ case fixup_Hexagon_TPREL_11_X:
+ return ELF::R_HEX_TPREL_11_X;
+ }
+}
+
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
+ StringRef CPU) {
+ MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
new file mode 100644
index 0000000..4bbfbec
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -0,0 +1,137 @@
+//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Hexagon {
+enum Fixups {
+ // Branch fixups for R_HEX_B{22,15,7}_PCREL.
+ fixup_Hexagon_B22_PCREL = FirstTargetFixupKind,
+ fixup_Hexagon_B15_PCREL,
+ fixup_Hexagon_B7_PCREL,
+ fixup_Hexagon_LO16,
+ fixup_Hexagon_HI16,
+ fixup_Hexagon_32,
+ fixup_Hexagon_16,
+ fixup_Hexagon_8,
+ fixup_Hexagon_GPREL16_0,
+ fixup_Hexagon_GPREL16_1,
+ fixup_Hexagon_GPREL16_2,
+ fixup_Hexagon_GPREL16_3,
+ fixup_Hexagon_HL16,
+ fixup_Hexagon_B13_PCREL,
+ fixup_Hexagon_B9_PCREL,
+ fixup_Hexagon_B32_PCREL_X,
+ fixup_Hexagon_32_6_X,
+ fixup_Hexagon_B22_PCREL_X,
+ fixup_Hexagon_B15_PCREL_X,
+ fixup_Hexagon_B13_PCREL_X,
+ fixup_Hexagon_B9_PCREL_X,
+ fixup_Hexagon_B7_PCREL_X,
+ fixup_Hexagon_16_X,
+ fixup_Hexagon_12_X,
+ fixup_Hexagon_11_X,
+ fixup_Hexagon_10_X,
+ fixup_Hexagon_9_X,
+ fixup_Hexagon_8_X,
+ fixup_Hexagon_7_X,
+ fixup_Hexagon_6_X,
+ fixup_Hexagon_32_PCREL,
+ fixup_Hexagon_COPY,
+ fixup_Hexagon_GLOB_DAT,
+ fixup_Hexagon_JMP_SLOT,
+ fixup_Hexagon_RELATIVE,
+ fixup_Hexagon_PLT_B22_PCREL,
+ fixup_Hexagon_GOTREL_LO16,
+ fixup_Hexagon_GOTREL_HI16,
+ fixup_Hexagon_GOTREL_32,
+ fixup_Hexagon_GOT_LO16,
+ fixup_Hexagon_GOT_HI16,
+ fixup_Hexagon_GOT_32,
+ fixup_Hexagon_GOT_16,
+ fixup_Hexagon_DTPMOD_32,
+ fixup_Hexagon_DTPREL_LO16,
+ fixup_Hexagon_DTPREL_HI16,
+ fixup_Hexagon_DTPREL_32,
+ fixup_Hexagon_DTPREL_16,
+ fixup_Hexagon_GD_PLT_B22_PCREL,
+ fixup_Hexagon_LD_PLT_B22_PCREL,
+ fixup_Hexagon_GD_GOT_LO16,
+ fixup_Hexagon_GD_GOT_HI16,
+ fixup_Hexagon_GD_GOT_32,
+ fixup_Hexagon_GD_GOT_16,
+ fixup_Hexagon_LD_GOT_LO16,
+ fixup_Hexagon_LD_GOT_HI16,
+ fixup_Hexagon_LD_GOT_32,
+ fixup_Hexagon_LD_GOT_16,
+ fixup_Hexagon_IE_LO16,
+ fixup_Hexagon_IE_HI16,
+ fixup_Hexagon_IE_32,
+ fixup_Hexagon_IE_16,
+ fixup_Hexagon_IE_GOT_LO16,
+ fixup_Hexagon_IE_GOT_HI16,
+ fixup_Hexagon_IE_GOT_32,
+ fixup_Hexagon_IE_GOT_16,
+ fixup_Hexagon_TPREL_LO16,
+ fixup_Hexagon_TPREL_HI16,
+ fixup_Hexagon_TPREL_32,
+ fixup_Hexagon_TPREL_16,
+ fixup_Hexagon_6_PCREL_X,
+ fixup_Hexagon_GOTREL_32_6_X,
+ fixup_Hexagon_GOTREL_16_X,
+ fixup_Hexagon_GOTREL_11_X,
+ fixup_Hexagon_GOT_32_6_X,
+ fixup_Hexagon_GOT_16_X,
+ fixup_Hexagon_GOT_11_X,
+ fixup_Hexagon_DTPREL_32_6_X,
+ fixup_Hexagon_DTPREL_16_X,
+ fixup_Hexagon_DTPREL_11_X,
+ fixup_Hexagon_GD_GOT_32_6_X,
+ fixup_Hexagon_GD_GOT_16_X,
+ fixup_Hexagon_GD_GOT_11_X,
+ fixup_Hexagon_LD_GOT_32_6_X,
+ fixup_Hexagon_LD_GOT_16_X,
+ fixup_Hexagon_LD_GOT_11_X,
+ fixup_Hexagon_IE_32_6_X,
+ fixup_Hexagon_IE_16_X,
+ fixup_Hexagon_IE_GOT_32_6_X,
+ fixup_Hexagon_IE_GOT_16_X,
+ fixup_Hexagon_IE_GOT_11_X,
+ fixup_Hexagon_TPREL_32_6_X,
+ fixup_Hexagon_TPREL_16_X,
+ fixup_Hexagon_TPREL_11_X,
+
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+enum FixupBitmaps : unsigned {
+ Word8 = 0xff,
+ Word16 = 0xffff,
+ Word32 = 0xffffffff,
+ Word32_LO = 0x00c03fff,
+ Word32_HL = 0x0, // Not Implemented
+ Word32_GP = 0x0, // Not Implemented
+ Word32_B7 = 0x00001f18,
+ Word32_B9 = 0x003000fe,
+ Word32_B13 = 0x00202ffe,
+ Word32_B15 = 0x00df20fe,
+ Word32_B22 = 0x01ff3ffe,
+ Word32_R6 = 0x000007e0,
+ Word32_U6 = 0x0, // Not Implemented
+ Word32_U16 = 0x0, // Not Implemented
+ Word32_X26 = 0x0fff3fff
+};
+} // namespace Hexagon
+} // namespace llvm
+
+#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
new file mode 100644
index 0000000..06ccec5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -0,0 +1,233 @@
+//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonAsmPrinter.h"
+#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define GET_INSTRUCTION_NAME
+#include "HexagonGenAsmWriter.inc"
+
+HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
+ MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
+}
+
+StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return MII.getName(Opcode);
+}
+
+void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << getRegName(RegNo);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
+}
+
+void HexagonInstPrinter::setExtender(MCInst const &MCI) {
+ HasExtender = HexagonMCInstrInfo::isImmext(MCI);
+}
+
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ assert(HexagonMCInstrInfo::isBundle(*MI));
+ assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+ assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
+ HasExtender = false;
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
+ MCInst const &MCI = *I.getInst();
+ if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
+ printInstruction(MCI.getOperand(1).getInst(), OS);
+ OS << '\v';
+ HasExtender = false;
+ printInstruction(MCI.getOperand(0).getInst(), OS);
+ } else
+ printInstruction(&MCI, OS);
+ setExtender(MCI);
+ OS << "\n";
+ }
+
+ auto Separator = "";
+ if (HexagonMCInstrInfo::isInnerLoop(*MI)) {
+ OS << Separator;
+ Separator = " ";
+ MCInst ME;
+ ME.setOpcode(Hexagon::ENDLOOP0);
+ printInstruction(&ME, OS);
+ }
+ if (HexagonMCInstrInfo::isOuterLoop(*MI)) {
+ OS << Separator;
+ Separator = " ";
+ MCInst ME;
+ ME.setOpcode(Hexagon::ENDLOOP1);
+ printInstruction(&ME, OS);
+ }
+}
+
+void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo &&
+ (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)))
+ O << "#";
+ MCOperand const &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isExpr()) {
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value))
+ O << formatImm(Value);
+ else
+ O << *MO.getExpr();
+ } else {
+ llvm_unreachable("Unknown operand");
+ }
+}
+
+void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
+ unsigned OpNo,
+ raw_ostream &O) const {
+ O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ O << -1;
+}
+
+void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<9>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
+
+void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
+}
+
+void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
+
+void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<11>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
+}
+
+void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+ printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ llvm_unreachable("Unknown branch operand.");
+}
+
+void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
+
+void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
+
+void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
+
+void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O, bool hi) const {
+ MCOperand const &MO = MI->getOperand(OpNo);
+
+ O << '#' << (hi ? "HI" : "LO") << '(';
+ if (MO.isImm()) {
+ O << '#';
+ printOperand(MI, OpNo, O);
+ } else {
+ printOperand(MI, OpNo, O);
+ assert("Unknown symbol operand");
+ }
+ O << ')';
+}
+
+void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ MCOperand const &MO = MI->getOperand(OpNo);
+ assert (MO.isExpr());
+ MCExpr const &Expr = *MO.getExpr();
+ int64_t Value;
+ if (Expr.evaluateAsAbsolute(Value))
+ O << format("0x%" PRIx64, Value);
+ else {
+ if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo)
+ O << "##";
+ O << Expr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
new file mode 100644
index 0000000..5f42118
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -0,0 +1,92 @@
+//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+/// Prints bundles as a newline separated list of individual instructions
+/// Duplexes are separated by a vertical tab \v character
+/// A trailing line includes bundle properties such as endloop0/1
+///
+/// r0 = add(r1, r2)
+/// r0 = #0 \v jump 0x0
+/// :endloop0 :endloop1
+class HexagonInstPrinter : public MCInstPrinter {
+public:
+ explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI);
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ void printInstruction(MCInst const *MI, raw_ostream &O);
+
+ StringRef getRegName(unsigned RegNo) const;
+ static char const *getRegisterName(unsigned RegNo);
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
+
+ void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNegImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printBranchOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printGlobalOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, true);
+ }
+ void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, false);
+ }
+
+ MCAsmInfo const &getMAI() const { return MAI; }
+ MCInstrInfo const &getMII() const { return MII; }
+
+protected:
+ void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
+ bool hi) const;
+
+private:
+ MCInstrInfo const &MII;
+
+ bool HasExtender;
+ void setExtender(MCInst const &MCI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 0000000..51d2f1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,37 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+// Pin the vtable to this file.
+void HexagonMCAsmInfo::anchor() {}
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = nullptr; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "//";
+
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ ZeroDirective = "\t.space\t";
+ AscizDirective = "\t.string\t";
+
+ SupportsDebugInformation = true;
+ UsesELFSectionDirectiveForBSS = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 0000000..a8456b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class HexagonMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit HexagonMCAsmInfo(const Triple &TT);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
new file mode 100644
index 0000000..46b7b41
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -0,0 +1,581 @@
+//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCChecker.h"
+
+#include "HexagonBaseInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
+ cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+
+const HexagonMCChecker::PredSense
+ HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+
+void HexagonMCChecker::init() {
+ // Initialize read-only registers set.
+ ReadOnly.insert(Hexagon::PC);
+
+ // Figure out the loop-registers definitions.
+ if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+ Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC0].insert(Unconditional);
+ }
+ if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC1].insert(Unconditional);
+ }
+
+ if (HexagonMCInstrInfo::isBundle(MCB))
+ // Unfurl a bundle.
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ init(*I.getInst());
+ }
+ else
+ init(MCB);
+}
+
+void HexagonMCChecker::init(MCInst const& MCI) {
+ const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+ unsigned PredReg = Hexagon::NoRegister;
+ bool isTrue = false;
+
+ // Get used registers.
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned R = MCI.getOperand(i).getReg();
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ // Note an used predicate register.
+ PredReg = R;
+ isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+ // Note use of new predicate register.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ NewPreds.insert(PredReg);
+ }
+ else
+ // Note register use. Super-registers are not tracked directly,
+ // but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers used indirectly.
+ Uses.insert(*SRI);
+ }
+
+ // Get implicit register definitions.
+ if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
+ for (; *ImpDef; ++ImpDef) {
+ unsigned R = *ImpDef;
+
+ if (Hexagon::R31 != R && MCID.isCall())
+ // Any register other than the LR and the PC are actually volatile ones
+ // as defined by the ABI, not modified implicitly by the call insn.
+ continue;
+ if (Hexagon::PC == R)
+ // Branches are the only insns that can change the PC,
+ // otherwise a read-only register.
+ continue;
+
+ if (Hexagon::USR_OVF == R)
+ // Many insns change the USR implicitly, but only one or another flag.
+ // The instruction table models the USR.OVF flag, which can be implicitly
+ // modified more than once, but cannot be modified in the same packet
+ // with an instruction that modifies is explicitly. Deal with such situ-
+ // ations individually.
+ SoftDefs.insert(R);
+ else if (isPredicateRegister(R) &&
+ HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
+ // Include implicit late predicates.
+ LatePreds.insert(R);
+ else
+ Defs[R].insert(PredSense(PredReg, isTrue));
+ }
+
+ // Figure out explicit register definitions.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
+ unsigned R = MCI.getOperand(i).getReg(),
+ S = Hexagon::NoRegister;
+
+ // Note register definitions, direct ones as well as indirect side-effects.
+ // Super-registers are not tracked directly, but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI) {
+ if (MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers defined indirectly.
+ continue;
+
+ if (R == *SRI) {
+ if (S == R)
+ // Avoid scoring the defined register multiple times.
+ continue;
+ else
+ // Note that the defined register has already been scored.
+ S = R;
+ }
+
+ if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI)
+ // P3:0 is a special case, since multiple predicate register definitions
+ // in a packet is allowed as the equivalent of their logical "and".
+ // Only an explicit definition of P3:0 is noted as such; if a
+ // side-effect, then note as a soft definition.
+ SoftDefs.insert(*SRI);
+ else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+ // Some insns produce predicates too late to be used in the same packet.
+ LatePreds.insert(*SRI);
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+ // Current loads should be used in the same packet.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+ // Temporary loads should be used in the same packet, but don't commit
+ // results, so it should be disregarded if another insn changes the same
+ // register.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ TmpDefs.insert(*SRI);
+ else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+ // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
+ // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
+ Uses.insert(*SRI);
+ else
+ Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ }
+ }
+
+ // Figure out register definitions that produce new values.
+ if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) {
+ unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (HexagonMCInstrInfo::isCompound(MCII, MCI))
+ compoundRegisterMap(R); // Compound insns have a limited register range.
+
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // No super-registers defined indirectly.
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+
+ // For fairly unique 2-dot-new producers, example:
+ // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
+ if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
+ unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
+
+ for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+ }
+ }
+
+ // Figure out definitions of new predicate registers.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned P = MCI.getOperand(i).getReg();
+
+ if (isPredicateRegister(P))
+ NewPreds.insert(P);
+ }
+
+ // Figure out uses of new values.
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) {
+ unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (!MCSubRegIterator(N, &RI).isValid()) {
+ // Super-registers cannot use new values.
+ if (MCID.isBranch())
+ NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+ else
+ NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+ }
+ }
+}
+
+HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
+ MCRegisterInfo const &ri)
+ : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
+ bLoadErrInfo(false) {
+ init();
+}
+
+bool HexagonMCChecker::check() {
+ bool chkB = checkBranches();
+ bool chkP = checkPredicates();
+ bool chkNV = checkNewValues();
+ bool chkR = checkRegisters();
+ bool chkS = checkSolo();
+ bool chkSh = checkShuffle();
+ bool chkSl = checkSlots();
+ bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+
+ return chk;
+}
+
+bool HexagonMCChecker::checkSlots()
+
+{
+ unsigned slotsUsed = 0;
+ for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
+ MCInst const& MCI = *HMI.getInst();
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
+ slotsUsed += 2;
+ else
+ ++slotsUsed;
+ }
+
+ if (slotsUsed > HEXAGON_PACKET_SIZE) {
+ HexagonMCErrInfo errInfo;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+// Check legal use of branches.
+bool HexagonMCChecker::checkBranches() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ bool hasConditional = false;
+ unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
+ NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+ Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
+
+ for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
+ i < MCB.size(); ++i) {
+ MCInst const &MCI = *MCB.begin()[i].getInst();
+
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
+ ++Branches;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ ++NewIndirectBranches;
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
+ ++NewValueBranches;
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
+ hasConditional = true;
+ Conditional = i; // Record the position of the conditional branch.
+ } else {
+ Unconditional = i; // Record the position of the unconditional branch.
+ }
+ }
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
+ ++Returns;
+ }
+
+ if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
+ if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ // Error out if there's any branch in a loop-end packet.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (Branches > 1)
+ if (!hasConditional || Conditional > Unconditional) {
+ // Error out if more than one unconditional branch or
+ // the conditional branch appears after the unconditional one.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of predicate registers.
+bool HexagonMCChecker::checkPredicates() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper use of new predicate registers.
+ for (const auto& I : NewPreds) {
+ unsigned P = I;
+
+ if (!Defs.count(P) || LatePreds.count(P)) {
+ // Error out if the new predicate register is not defined,
+ // or defined "late"
+ // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ // Check for proper use of auto-anded of predicate registers.
+ for (const auto& I : LatePreds) {
+ unsigned P = I;
+
+ if (LatePreds.count(P) > 1 || Defs.count(P)) {
+ // Error out if predicate register defined "late" multiple times or
+ // defined late and regularly defined
+ // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of new values.
+bool HexagonMCChecker::checkNewValues() {
+ HexagonMCErrInfo errInfo;
+ memset(&errInfo, 0, sizeof(errInfo));
+ for (auto& I : NewUses) {
+ unsigned R = I.first;
+ NewSense &US = I.second;
+
+ if (!hasValidNewValueDef(US, NewDefs[R])) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check for legal register uses and definitions.
+bool HexagonMCChecker::checkRegisters() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper register definitions.
+ for (const auto& I : Defs) {
+ unsigned R = I.first;
+
+ if (ReadOnly.count(R)) {
+ // Error out for definitions of read-only registers.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (isLoopRegister(R) && Defs.count(R) > 1 &&
+ (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB))) {
+ // Error out for definitions of loop registers at the end of a loop.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (SoftDefs.count(R)) {
+ // Error out for explicit changes to registers also weakly defined
+ // (e.g., "{ usr = r0; r0 = sfadd(...) }").
+ unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+ // Check for multiple register definitions.
+ PredSet &PM = Defs[R];
+
+ // Check for multiple unconditional register definitions.
+ if (PM.count(Unconditional)) {
+ // Error out on an unconditional change when there are any other
+ // changes, conditional or not.
+ unsigned UsrR = Hexagon::USR;
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for multiple conditional register definitions.
+ for (const auto& J : PM) {
+ PredSense P = J;
+
+ // Check for multiple uses of the same condition.
+ if (PM.count(P) > 1) {
+ // Error out on conditional changes based on the same predicate
+ // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for the use of the complementary condition.
+ P.second = !P.second;
+ if (PM.count(P) && PM.size() > 2) {
+ // Error out on conditional changes based on the same predicate
+ // multiple times
+ // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+ }
+
+ // Check for use of current definitions.
+ for (const auto& I : CurDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // Warn on an unused current definition.
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+
+ // Check for use of temporary definitions.
+ for (const auto& I : TmpDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // special case for vhist
+ bool vHistFound = false;
+ for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
+ vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp
+ break;
+ }
+ }
+ // Warn on an unused temporary definition.
+ if (vHistFound == false) {
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+ }
+
+ return true;
+}
+
+// Check for legal use of solo insns.
+bool HexagonMCChecker::checkSolo() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB) &&
+ HexagonMCInstrInfo::bundleSize(MCB) > 1) {
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool HexagonMCChecker::checkShuffle() {
+ HexagonMCErrInfo errInfo;
+ // Branch info is lost when duplexing. The unduplexed insns must be
+ // checked and only branch errors matter for this case.
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ if (!MCS.check()) {
+ if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCS.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+ if (!MCSDX.check()) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCSDX.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::R15:
+ Register = Hexagon::R23;
+ break;
+ case Hexagon::R14:
+ Register = Hexagon::R22;
+ break;
+ case Hexagon::R13:
+ Register = Hexagon::R21;
+ break;
+ case Hexagon::R12:
+ Register = Hexagon::R20;
+ break;
+ case Hexagon::R11:
+ Register = Hexagon::R19;
+ break;
+ case Hexagon::R10:
+ Register = Hexagon::R18;
+ break;
+ case Hexagon::R9:
+ Register = Hexagon::R17;
+ break;
+ case Hexagon::R8:
+ Register = Hexagon::R16;
+ break;
+ }
+}
+
+bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const {
+ bool Strict = !RelaxNVChecks;
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ const NewSense &Def = Defs[i];
+ // NVJ cannot use a new FP value [7.6.1]
+ if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0))
+ continue;
+ // If the definition was not predicated, then it does not matter if
+ // the use is.
+ if (Def.PredReg == 0)
+ return true;
+ // With the strict checks, both the definition and the use must be
+ // predicated on the same register and condition.
+ if (Strict) {
+ if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond)
+ return true;
+ } else {
+ // With the relaxed checks, if the definition was predicated, the only
+ // detectable violation is if the use is predicated on the opposing
+ // condition, otherwise, it's ok.
+ if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond)
+ return true;
+ }
+ }
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
new file mode 100644
index 0000000..5fc0bde
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -0,0 +1,218 @@
+//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCHECKER_H
+#define HEXAGONMCCHECKER_H
+
+#include <map>
+#include <set>
+#include <queue>
+#include "MCTargetDesc/HexagonMCShuffler.h"
+
+using namespace llvm;
+
+namespace llvm {
+class MCOperandInfo;
+
+typedef struct {
+ unsigned Error, Warning, ShuffleError;
+ unsigned Register;
+} ErrInfo_T;
+
+class HexagonMCErrInfo {
+public:
+ enum {
+ CHECK_SUCCESS = 0,
+ // Errors.
+ CHECK_ERROR_BRANCHES = 0x00001,
+ CHECK_ERROR_NEWP = 0x00002,
+ CHECK_ERROR_NEWV = 0x00004,
+ CHECK_ERROR_REGISTERS = 0x00008,
+ CHECK_ERROR_READONLY = 0x00010,
+ CHECK_ERROR_LOOP = 0x00020,
+ CHECK_ERROR_ENDLOOP = 0x00040,
+ CHECK_ERROR_SOLO = 0x00080,
+ CHECK_ERROR_SHUFFLE = 0x00100,
+ CHECK_ERROR_NOSLOTS = 0x00200,
+ CHECK_ERROR_UNKNOWN = 0x00400,
+ // Warnings.
+ CHECK_WARN_CURRENT = 0x10000,
+ CHECK_WARN_TEMPORARY = 0x20000
+ };
+ ErrInfo_T s;
+
+ void reset() {
+ s.Error = CHECK_SUCCESS;
+ s.Warning = CHECK_SUCCESS;
+ s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
+ s.Register = Hexagon::NoRegister;
+ };
+ HexagonMCErrInfo() {
+ reset();
+ };
+
+ void setError(unsigned e, unsigned r = Hexagon::NoRegister)
+ { s.Error = e; s.Register = r; };
+ void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
+ { s.Warning = w; s.Register = r; };
+ void setShuffleError(unsigned e) { s.ShuffleError = e; };
+};
+
+/// Check for a valid bundle.
+class HexagonMCChecker {
+ /// Insn bundle.
+ MCInst& MCB;
+ MCInst& MCBDX;
+ const MCRegisterInfo& RI;
+ MCInstrInfo const &MCII;
+ MCSubtargetInfo const &STI;
+ bool bLoadErrInfo;
+
+ /// Set of definitions: register #, if predicated, if predicated true.
+ typedef std::pair<unsigned, bool> PredSense;
+ static const PredSense Unconditional;
+ typedef std::multiset<PredSense> PredSet;
+ typedef std::multiset<PredSense>::iterator PredSetIterator;
+
+ typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator;
+ llvm::DenseMap<unsigned, PredSet> Defs;
+
+ /// Information about how a new-value register is defined or used:
+ /// PredReg = predicate register, 0 if use/def not predicated,
+ /// Cond = true/false for if(PredReg)/if(!PredReg) respectively,
+ /// IsFloat = true if definition produces a floating point value
+ /// (not valid for uses),
+ /// IsNVJ = true if the use is a new-value branch (not valid for
+ /// definitions).
+ struct NewSense {
+ unsigned PredReg;
+ bool IsFloat, IsNVJ, Cond;
+ // The special-case "constructors":
+ static NewSense Jmp(bool isNVJ) {
+ NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
+ /*Cond=*/ false };
+ return NS;
+ }
+ static NewSense Use(unsigned PR, bool True) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ static NewSense Def(unsigned PR, bool True, bool Float) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ };
+ /// Set of definitions that produce new register:
+ typedef llvm::SmallVector<NewSense,2> NewSenseList;
+ typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
+ llvm::DenseMap<unsigned, NewSenseList> NewDefs;
+
+ /// Set of weak definitions whose clashes should be enforced selectively.
+ typedef std::set<unsigned>::iterator SoftDefsIterator;
+ std::set<unsigned> SoftDefs;
+
+ /// Set of current definitions committed to the register file.
+ typedef std::set<unsigned>::iterator CurDefsIterator;
+ std::set<unsigned> CurDefs;
+
+ /// Set of temporary definitions not committed to the register file.
+ typedef std::set<unsigned>::iterator TmpDefsIterator;
+ std::set<unsigned> TmpDefs;
+
+ /// Set of new predicates used.
+ typedef std::set<unsigned>::iterator NewPredsIterator;
+ std::set<unsigned> NewPreds;
+
+ /// Set of predicates defined late.
+ typedef std::multiset<unsigned>::iterator LatePredsIterator;
+ std::multiset<unsigned> LatePreds;
+
+ /// Set of uses.
+ typedef std::set<unsigned>::iterator UsesIterator;
+ std::set<unsigned> Uses;
+
+ /// Set of new values used: new register, if new-value jump.
+ typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator;
+ llvm::DenseMap<unsigned, NewSense> NewUses;
+
+ /// Pre-defined set of read-only registers.
+ typedef std::set<unsigned>::iterator ReadOnlyIterator;
+ std::set<unsigned> ReadOnly;
+
+ std::queue<ErrInfo_T> ErrInfoQ;
+ HexagonMCErrInfo CrntErrInfo;
+
+ void getErrInfo() {
+ if (bLoadErrInfo == true) {
+ if (ErrInfoQ.empty()) {
+ CrntErrInfo.reset();
+ } else {
+ CrntErrInfo.s = ErrInfoQ.front();
+ ErrInfoQ.pop();
+ }
+ }
+ bLoadErrInfo = false;
+ }
+
+ void init();
+ void init(MCInst const&);
+
+ // Checks performed.
+ bool checkBranches();
+ bool checkPredicates();
+ bool checkNewValues();
+ bool checkRegisters();
+ bool checkSolo();
+ bool checkShuffle();
+ bool checkSlots();
+
+ static void compoundRegisterMap(unsigned&);
+
+ bool isPredicateRegister(unsigned R) const {
+ return (Hexagon::P0 == R || Hexagon::P1 == R ||
+ Hexagon::P2 == R || Hexagon::P3 == R);
+ };
+ bool isLoopRegister(unsigned R) const {
+ return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
+ Hexagon::SA1 == R || Hexagon::LC1 == R);
+ };
+
+ bool hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const;
+
+ public:
+ explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
+ const MCRegisterInfo& ri);
+
+ bool check();
+
+ /// add a new error/warning
+ void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
+
+ /// Return the error code for the last operation in the insn bundle.
+ unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
+ unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
+ unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
+ unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
+ bool getNextErrInfo() {
+ bLoadErrInfo = true;
+ return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
+ }
+};
+
+}
+
+#endif // HEXAGONMCCHECKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
new file mode 100644
index 0000000..c2c6275
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -0,0 +1,741 @@
+//===-- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+using namespace llvm;
+using namespace Hexagon;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
+ MCContext &aMCT)
+ : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
+ Extended(new bool(false)), CurrentBundle(new MCInst const *) {}
+
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last,
+ MCInst const &MCB,
+ MCInst const &MCI) const {
+ bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
+ if (Instruction == 0) {
+ if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+ assert(!Duplex);
+ assert(Instruction != Last);
+ return HexagonII::INST_PARSE_LOOP_END;
+ }
+ }
+ if (Instruction == 1) {
+ if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ assert(!Duplex);
+ assert(Instruction != Last);
+ return HexagonII::INST_PARSE_LOOP_END;
+ }
+ }
+ if (Duplex) {
+ assert(Instruction == Last);
+ return HexagonII::INST_PARSE_DUPLEX;
+ }
+ if(Instruction == Last)
+ return HexagonII::INST_PARSE_PACKET_END;
+ return HexagonII::INST_PARSE_NOT_END;
+}
+
+void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ MCInst &HMB = const_cast<MCInst &>(MI);
+
+ assert(HexagonMCInstrInfo::isBundle(HMB));
+ DEBUG(dbgs() << "Encoding bundle\n";);
+ *Addend = 0;
+ *Extended = false;
+ *CurrentBundle = &MI;
+ size_t Instruction = 0;
+ size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
+ MCInst &HMI = const_cast<MCInst &>(*I.getInst());
+ EncodeSingleInstruction(HMI, OS, Fixups, STI,
+ parseBits(Instruction, Last, HMB, HMI),
+ Instruction);
+ *Extended = HexagonMCInstrInfo::isImmext(HMI);
+ *Addend += HEXAGON_INSTR_SIZE;
+ ++Instruction;
+ }
+ return;
+}
+
+/// EncodeSingleInstruction - Emit a single
+void HexagonMCCodeEmitter::EncodeSingleInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const {
+ MCInst HMB = MI;
+ assert(!HexagonMCInstrInfo::isBundle(HMB));
+ uint64_t Binary;
+
+ // Compound instructions are limited to using registers 0-7 and 16-23
+ // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
+ static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10,
+ Hexagon::R11, Hexagon::R12, Hexagon::R13,
+ Hexagon::R14, Hexagon::R15};
+
+ // Pseudo instructions don't get encoded and shouldn't be here
+ // in the first place!
+ assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
+ "pseudo-instruction found");
+ DEBUG(dbgs() << "Encoding insn"
+ " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ "\n");
+
+ if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
+ for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
+ if (HMB.getOperand(i).isReg()) {
+ unsigned Reg =
+ MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
+ if ((Reg <= 23) && (Reg >= 16))
+ HMB.getOperand(i).setReg(RegMap[Reg - 16]);
+ }
+ }
+
+ if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
+ // Calculate the new value distance to the associated producer
+ MCOperand &MCO =
+ HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB));
+ unsigned SOffset = 0;
+ unsigned Register = MCO.getReg();
+ unsigned Register1;
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.begin() + Index - 1;
+ for (;; --i) {
+ assert(i != Instructions.begin() - 1 && "Couldn't find producer");
+ MCInst const &Inst = *i->getInst();
+ if (HexagonMCInstrInfo::isImmext(Inst))
+ continue;
+ ++SOffset;
+ Register1 =
+ HexagonMCInstrInfo::hasNewValue(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ if (Register != Register1)
+ // This isn't the register we're looking for
+ continue;
+ if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+ // Producer is unpredicated
+ break;
+ assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) &&
+ "Unpredicated consumer depending on predicated producer");
+ if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
+ HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB))
+ // Producer predicate sense matched ours
+ break;
+ }
+ // Hexagon PRM 10.11 Construct Nt from distance
+ unsigned Offset = SOffset;
+ Offset <<= 1;
+ MCO.setReg(Offset + Hexagon::R0);
+ }
+
+ Binary = getBinaryCodeForInstr(HMB, Fixups, STI);
+ // Check for unimplemented instructions. Immediate extenders
+ // are encoded as zero, so they need to be accounted for.
+ if ((!Binary) &&
+ ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) &&
+ (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) &&
+ (HMB.getOpcode() != A4_ext_g))) {
+ // Use a A2_nop for unimplemented instructions.
+ DEBUG(dbgs() << "Unimplemented inst: "
+ " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ "\n");
+ llvm_unreachable("Unimplemented Instruction");
+ }
+ Binary |= Parse;
+
+ // if we need to emit a duplexed instruction
+ if (HMB.getOpcode() >= Hexagon::DuplexIClass0 &&
+ HMB.getOpcode() <= Hexagon::DuplexIClassF) {
+ assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
+ "Emitting duplex without duplex parse bits");
+ unsigned dupIClass;
+ switch (HMB.getOpcode()) {
+ case Hexagon::DuplexIClass0:
+ dupIClass = 0;
+ break;
+ case Hexagon::DuplexIClass1:
+ dupIClass = 1;
+ break;
+ case Hexagon::DuplexIClass2:
+ dupIClass = 2;
+ break;
+ case Hexagon::DuplexIClass3:
+ dupIClass = 3;
+ break;
+ case Hexagon::DuplexIClass4:
+ dupIClass = 4;
+ break;
+ case Hexagon::DuplexIClass5:
+ dupIClass = 5;
+ break;
+ case Hexagon::DuplexIClass6:
+ dupIClass = 6;
+ break;
+ case Hexagon::DuplexIClass7:
+ dupIClass = 7;
+ break;
+ case Hexagon::DuplexIClass8:
+ dupIClass = 8;
+ break;
+ case Hexagon::DuplexIClass9:
+ dupIClass = 9;
+ break;
+ case Hexagon::DuplexIClassA:
+ dupIClass = 10;
+ break;
+ case Hexagon::DuplexIClassB:
+ dupIClass = 11;
+ break;
+ case Hexagon::DuplexIClassC:
+ dupIClass = 12;
+ break;
+ case Hexagon::DuplexIClassD:
+ dupIClass = 13;
+ break;
+ case Hexagon::DuplexIClassE:
+ dupIClass = 14;
+ break;
+ case Hexagon::DuplexIClassF:
+ dupIClass = 15;
+ break;
+ default:
+ llvm_unreachable("Unimplemented DuplexIClass");
+ break;
+ }
+ // 29 is the bit position.
+ // 0b1110 =0xE bits are masked off and down shifted by 1 bit.
+ // Last bit is moved to bit position 13
+ Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
+
+ const MCInst *subInst0 = HMB.getOperand(0).getInst();
+ const MCInst *subInst1 = HMB.getOperand(1).getInst();
+
+ // get subinstruction slot 0
+ unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
+ // get subinstruction slot 1
+ unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI);
+
+ Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16);
+ }
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
+ ++MCNumEmitted;
+}
+
+static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+ const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) {
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+ unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
+
+ if (insnType == HexagonII::TypePREFIX) {
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_32_6_X;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
+ default:
+ if (MCID.isBranch())
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ else
+ return Hexagon::fixup_Hexagon_32_6_X;
+ }
+ } else if (MCID.isBranch())
+ return (Hexagon::fixup_Hexagon_B13_PCREL);
+
+ switch (MCID.getOpcode()) {
+ case Hexagon::HI:
+ case Hexagon::A2_tfrih:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_HI16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_HI16;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_HI16;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_HI16;
+ default:
+ return Hexagon::fixup_Hexagon_HI16;
+ }
+
+ case Hexagon::LO:
+ case Hexagon::A2_tfril:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ return Hexagon::fixup_Hexagon_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ return Hexagon::fixup_Hexagon_GOTREL_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ return Hexagon::fixup_Hexagon_GD_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ return Hexagon::fixup_Hexagon_LD_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ return Hexagon::fixup_Hexagon_IE_LO16;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ return Hexagon::fixup_Hexagon_IE_GOT_LO16;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ return Hexagon::fixup_Hexagon_TPREL_LO16;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ return Hexagon::fixup_Hexagon_DTPREL_LO16;
+ default:
+ return Hexagon::fixup_Hexagon_LO16;
+ }
+
+ // The only relocs left should be GP relative:
+ default:
+ if (MCID.mayStore() || MCID.mayLoad()) {
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ ++ImpUses) {
+ if (*ImpUses == Hexagon::GP) {
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ return fixup_Hexagon_GPREL16_0;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ return fixup_Hexagon_GPREL16_1;
+ case HexagonII::MemAccessSize::WordAccess:
+ return fixup_Hexagon_GPREL16_2;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ return fixup_Hexagon_GPREL16_3;
+ default:
+ llvm_unreachable("unhandled fixup");
+ }
+ }
+ }
+ } else
+ llvm_unreachable("unhandled fixup");
+ }
+
+ return LastTargetFixupKind;
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+namespace {
+ bool isPCRel (unsigned Kind) {
+ switch(Kind){
+ case fixup_Hexagon_B22_PCREL:
+ case fixup_Hexagon_B15_PCREL:
+ case fixup_Hexagon_B7_PCREL:
+ case fixup_Hexagon_B13_PCREL:
+ case fixup_Hexagon_B9_PCREL:
+ case fixup_Hexagon_B32_PCREL_X:
+ case fixup_Hexagon_B22_PCREL_X:
+ case fixup_Hexagon_B15_PCREL_X:
+ case fixup_Hexagon_B13_PCREL_X:
+ case fixup_Hexagon_B9_PCREL_X:
+ case fixup_Hexagon_B7_PCREL_X:
+ case fixup_Hexagon_32_PCREL:
+ case fixup_Hexagon_PLT_B22_PCREL:
+ case fixup_Hexagon_GD_PLT_B22_PCREL:
+ case fixup_Hexagon_LD_PLT_B22_PCREL:
+ case fixup_Hexagon_6_PCREL_X:
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ const MCExpr *ME,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const
+
+{
+ int64_t Res;
+
+ if (ME->evaluateAsAbsolute(Res))
+ return Res;
+
+ MCExpr::ExprKind MK = ME->getKind();
+ if (MK == MCExpr::Constant) {
+ return cast<MCConstantExpr>(ME)->getValue();
+ }
+ if (MK == MCExpr::Binary) {
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+ return 0;
+ }
+
+ assert(MK == MCExpr::SymbolRef);
+
+ Hexagon::Fixups FixupKind =
+ Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16);
+ const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME);
+ const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+ unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
+ HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind();
+
+ DEBUG(dbgs() << "----------------------------------------\n");
+ DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
+ << "\n");
+ DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n");
+ DEBUG(dbgs() << "Relocation bits: " << bits << "\n");
+ DEBUG(dbgs() << "Addend: " << *Addend << "\n");
+ DEBUG(dbgs() << "----------------------------------------\n");
+
+ switch (bits) {
+ default:
+ DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n');
+ break;
+
+ case 32:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+ FixupKind = Hexagon::fixup_Hexagon_32_PCREL;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X
+ : Hexagon::fixup_Hexagon_GOTREL_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_GD_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_LD_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X
+ : Hexagon::fixup_Hexagon_IE_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X
+ : Hexagon::fixup_Hexagon_IE_GOT_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X
+ : Hexagon::fixup_Hexagon_TPREL_32;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X
+ : Hexagon::fixup_Hexagon_DTPREL_32;
+ break;
+ default:
+ FixupKind =
+ *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32;
+ break;
+ }
+ break;
+
+ case 22:
+ switch (kind) {
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
+ break;
+ default:
+ if (MCID.isBranch() || MCID.isCall()) {
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
+ : Hexagon::fixup_Hexagon_B22_PCREL;
+ } else {
+ errs() << "unrecognized relocation, bits: " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+ }
+ break;
+
+ case 16:
+ if (*Extended) {
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+ FixupKind = Hexagon::fixup_Hexagon_IE_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
+ break;
+ }
+ } else
+ switch (kind) {
+ default:
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ if ((MCID.getOpcode() == Hexagon::HI) ||
+ (MCID.getOpcode() == Hexagon::LO_H))
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
+ else
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL:
+ FixupKind = Hexagon::fixup_Hexagon_GPREL16_0;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LO16:
+ FixupKind = Hexagon::fixup_Hexagon_LO16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_HI16:
+ FixupKind = Hexagon::fixup_Hexagon_HI16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_16;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_16;
+ break;
+ }
+ break;
+
+ case 15:
+ if (MCID.isBranch() || MCID.isCall())
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X
+ : Hexagon::fixup_Hexagon_B15_PCREL;
+ break;
+
+ case 13:
+ if (MCID.isBranch())
+ FixupKind = Hexagon::fixup_Hexagon_B13_PCREL;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 12:
+ if (*Extended)
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_12_X;
+ break;
+ // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+ break;
+ }
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 11:
+ if (*Extended)
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_TPREL:
+ FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_DTPREL:
+ FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X;
+ break;
+ }
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 10:
+ if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_10_X;
+ break;
+
+ case 9:
+ if (MCID.isBranch() ||
+ (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
+ : Hexagon::fixup_Hexagon_B9_PCREL;
+ else if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_9_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 8:
+ if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_8_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 7:
+ if (MCID.isBranch() ||
+ (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+ FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
+ : Hexagon::fixup_Hexagon_B7_PCREL;
+ else if (*Extended)
+ FixupKind = Hexagon::fixup_Hexagon_7_X;
+ else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 6:
+ if (*Extended) {
+ switch (kind) {
+ default:
+ FixupKind = Hexagon::fixup_Hexagon_6_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+ FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X;
+ break;
+ // This is part of an extender, GOT_11 is a
+ // Word32_U6 unsigned/truncated reloc.
+ case llvm::MCSymbolRefExpr::VK_GOT:
+ FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+ break;
+ case llvm::MCSymbolRefExpr::VK_GOTOFF:
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+ break;
+ }
+ } else {
+ errs() << "unrecognized relocation, bits " << bits << "\n";
+ errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+ }
+ break;
+
+ case 0:
+ FixupKind = getFixupNoBits(MCII, MI, MO, kind);
+ break;
+ }
+
+ MCExpr const *FixupExpression = (*Addend > 0 && isPCRel(FixupKind)) ?
+ MCBinaryExpr::createAdd(MO.getExpr(),
+ MCConstantExpr::create(*Addend, MCT), MCT) :
+ MO.getExpr();
+
+ MCFixup fixup = MCFixup::create(*Addend, FixupExpression,
+ MCFixupKind(FixupKind), MI.getLoc());
+ Fixups.push_back(fixup);
+ // All of the information is in the fixup.
+ return (0);
+}
+
+unsigned
+HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ if (MO.isReg())
+ return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ // MO must be an ME.
+ assert(MO.isExpr());
+ return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI);
+}
+
+MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI,
+ MCContext &MCT) {
+ return new HexagonMCCodeEmitter(MII, MCT);
+}
+
+#include "HexagonGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
new file mode 100644
index 0000000..2a154da
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -0,0 +1,70 @@
+//===-- HexagonMCCodeEmitter.h - Hexagon Target Descriptions ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Definition for classes that emit Hexagon machine code from MCInsts
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCODEEMITTER_H
+#define HEXAGONMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class HexagonMCCodeEmitter : public MCCodeEmitter {
+ MCContext &MCT;
+ MCInstrInfo const &MCII;
+ std::unique_ptr<unsigned> Addend;
+ std::unique_ptr<bool> Extended;
+ std::unique_ptr<MCInst const *> CurrentBundle;
+
+ // helper routine for getMachineOpValue()
+ unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
+ const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+public:
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
+
+ // Return parse bits for instruction `MCI' inside bundle `MCB'
+ uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
+ MCInst const &MCI) const;
+
+ void encodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const override;
+
+ void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI,
+ uint32_t Parse, size_t Index) const;
+
+ // \brief TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(MCInst const &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+
+ /// \brief Return binary encoding of operand.
+ unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+}; // class HexagonMCCodeEmitter
+
+} // namespace llvm
+
+#endif /* HEXAGONMCCODEEMITTER_H */
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
new file mode 100644
index 0000000..d194bea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -0,0 +1,427 @@
+
+//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is looks at a packet and tries to form compound insns
+//
+//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-mccompound"
+
+enum OpcodeIndex {
+ fp0_jump_nt = 0,
+ fp0_jump_t,
+ fp1_jump_nt,
+ fp1_jump_t,
+ tp0_jump_nt,
+ tp0_jump_t,
+ tp1_jump_nt,
+ tp1_jump_t
+};
+
+static const unsigned tstBitOpcode[8] = {
+ J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t, J4_tstbit0_fp1_jump_nt,
+ J4_tstbit0_fp1_jump_t, J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t,
+ J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t};
+static const unsigned cmpeqBitOpcode[8] = {
+ J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t, J4_cmpeq_fp1_jump_nt,
+ J4_cmpeq_fp1_jump_t, J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t,
+ J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t};
+static const unsigned cmpgtBitOpcode[8] = {
+ J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t, J4_cmpgt_fp1_jump_nt,
+ J4_cmpgt_fp1_jump_t, J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t,
+ J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t};
+static const unsigned cmpgtuBitOpcode[8] = {
+ J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t, J4_cmpgtu_fp1_jump_nt,
+ J4_cmpgtu_fp1_jump_t, J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t,
+ J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t};
+static const unsigned cmpeqiBitOpcode[8] = {
+ J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t, J4_cmpeqi_fp1_jump_nt,
+ J4_cmpeqi_fp1_jump_t, J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t,
+ J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t};
+static const unsigned cmpgtiBitOpcode[8] = {
+ J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t, J4_cmpgti_fp1_jump_nt,
+ J4_cmpgti_fp1_jump_t, J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t,
+ J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t};
+static const unsigned cmpgtuiBitOpcode[8] = {
+ J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t, J4_cmpgtui_fp1_jump_nt,
+ J4_cmpgtui_fp1_jump_t, J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t,
+ J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t};
+static const unsigned cmpeqn1BitOpcode[8] = {
+ J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t, J4_cmpeqn1_fp1_jump_nt,
+ J4_cmpeqn1_fp1_jump_t, J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t,
+ J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t};
+static const unsigned cmpgtn1BitOpcode[8] = {
+ J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t, J4_cmpgtn1_fp1_jump_nt,
+ J4_cmpgtn1_fp1_jump_t, J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t,
+ J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t,
+};
+
+// enum HexagonII::CompoundGroup
+namespace {
+unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
+ switch (MI.getOpcode()) {
+ default:
+ return HexagonII::HCG_None;
+ //
+ // Compound pairs.
+ // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+ // "Rd16=#U6 ; jump #r9:2"
+ // "Rd16=Rs16 ; jump #r9:2"
+ //
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ if (IsExtended)
+ return false;
+ DstReg = MI.getOperand(0).getReg();
+ Src1Reg = MI.getOperand(1).getReg();
+ Src2Reg = MI.getOperand(2).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ if (IsExtended)
+ return false;
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (HexagonMCInstrInfo::inRange<5>(MI, 2) ||
+ HexagonMCInstrInfo::minConstant(MI, 2) == -1))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfr:
+ if (IsExtended)
+ return false;
+ // Rd = Rs
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ if (IsExtended)
+ return false;
+ // Rd = #u6
+ DstReg = MI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
+ HexagonMCInstrInfo::minConstant(MI, 1) >= 0 &&
+ HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::S2_tstbit_i:
+ if (IsExtended)
+ return false;
+ DstReg = MI.getOperand(0).getReg();
+ Src1Reg = MI.getOperand(1).getReg();
+ if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::minConstant(MI, 2) == 0)
+ return HexagonII::HCG_A;
+ break;
+ // The fact that .new form is used pretty much guarantees
+ // that predicate register will match. Nevertheless,
+ // there could be some false positives without additional
+ // checking.
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnewpt:
+ Src1Reg = MI.getOperand(0).getReg();
+ if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)
+ return HexagonII::HCG_B;
+ break;
+ // Transfer and jump:
+ // Rd=#U6 ; jump #r9:2
+ // Rd=Rs ; jump #r9:2
+ // Do not test for jump range here.
+ case Hexagon::J2_jump:
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ return HexagonII::HCG_C;
+ break;
+ }
+
+ return HexagonII::HCG_None;
+}
+}
+
+/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
+namespace {
+unsigned getCompoundOp(MCInst const &HMCI) {
+ const MCOperand &Predicate = HMCI.getOperand(0);
+ unsigned PredReg = Predicate.getReg();
+
+ assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) ||
+ (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3));
+
+ switch (HMCI.getOpcode()) {
+ default:
+ llvm_unreachable("Expected match not found.\n");
+ break;
+ case Hexagon::J2_jumpfnew:
+ return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt;
+ case Hexagon::J2_jumpfnewpt:
+ return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t;
+ case Hexagon::J2_jumptnew:
+ return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt;
+ case Hexagon::J2_jumptnewpt:
+ return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
+ }
+}
+}
+
+namespace {
+MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
+ MCInst *CompoundInsn = 0;
+ unsigned compoundOpcode;
+ MCOperand Rs, Rt;
+ int64_t Value;
+ bool Success;
+
+ switch (L.getOpcode()) {
+ default:
+ DEBUG(dbgs() << "Possible compound ignored\n");
+ return CompoundInsn;
+
+ case Hexagon::A2_tfrsi:
+ Rt = L.getOperand(0);
+ compoundOpcode = J4_jumpseti;
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(L.getOperand(1)); // Immediate
+ CompoundInsn->addOperand(R.getOperand(0)); // Jump target
+ break;
+
+ case Hexagon::A2_tfr:
+ Rt = L.getOperand(0);
+ Rs = L.getOperand(1);
+
+ compoundOpcode = J4_jumpsetr;
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(R.getOperand(0)); // Jump target.
+
+ break;
+
+ case Hexagon::C2_cmpeq:
+ DEBUG(dbgs() << "CX: C2_cmpeq\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+
+ compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::C2_cmpgt:
+ DEBUG(dbgs() << "CX: C2_cmpgt\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+
+ compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::C2_cmpgtu:
+ DEBUG(dbgs() << "CX: C2_cmpgtu\n");
+ Rs = L.getOperand(1);
+ Rt = L.getOperand(2);
+
+ compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(Rt);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::C2_cmpeqi:
+ DEBUG(dbgs() << "CX: C2_cmpeqi\n");
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
+ compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
+ else
+ compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
+
+ Rs = L.getOperand(1);
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ if (Value != -1)
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::C2_cmpgti:
+ DEBUG(dbgs() << "CX: C2_cmpgti\n");
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
+ compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
+ else
+ compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
+
+ Rs = L.getOperand(1);
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ if (Value != -1)
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::C2_cmpgtui:
+ DEBUG(dbgs() << "CX: C2_cmpgtui\n");
+ Rs = L.getOperand(1);
+ compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(L.getOperand(2));
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+
+ case Hexagon::S2_tstbit_i:
+ DEBUG(dbgs() << "CX: S2_tstbit_i\n");
+ Rs = L.getOperand(1);
+ compoundOpcode = tstBitOpcode[getCompoundOp(R)];
+ CompoundInsn = new (Context) MCInst;
+ CompoundInsn->setOpcode(compoundOpcode);
+ CompoundInsn->addOperand(Rs);
+ CompoundInsn->addOperand(R.getOperand(1));
+ break;
+ }
+
+ return CompoundInsn;
+}
+}
+
+/// Non-Symmetrical. See if these two instructions are fit for compound pair.
+namespace {
+bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+ MCInst const &MIb, bool IsExtendedB) {
+ unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
+ unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
+ // We have two candidates - check that this is the same register
+ // we are talking about.
+ unsigned Opca = MIa.getOpcode();
+ if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C &&
+ (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi))
+ return true;
+ return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
+ (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
+}
+}
+
+namespace {
+bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCI));
+ bool JExtended = false;
+ for (MCInst::iterator J =
+ MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+ J != MCI.end(); ++J) {
+ MCInst const *JumpInst = J->getInst();
+ if (HexagonMCInstrInfo::isImmext(*JumpInst)) {
+ JExtended = true;
+ continue;
+ }
+ if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
+ HexagonII::TypeJ) {
+ // Try to pair with another insn (B)undled with jump.
+ bool BExtended = false;
+ for (MCInst::iterator B =
+ MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+ B != MCI.end(); ++B) {
+ MCInst const *Inst = B->getInst();
+ if (JumpInst == Inst)
+ continue;
+ if (HexagonMCInstrInfo::isImmext(*Inst)) {
+ BExtended = true;
+ continue;
+ }
+ DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << ","
+ << Inst->getOpcode() << "\n");
+ if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) {
+ MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst);
+ if (CompoundInsn) {
+ DEBUG(dbgs() << "B: " << Inst->getOpcode() << ","
+ << JumpInst->getOpcode() << " Compounds to "
+ << CompoundInsn->getOpcode() << "\n");
+ J->setInst(CompoundInsn);
+ MCI.erase(B);
+ return true;
+ }
+ }
+ BExtended = false;
+ }
+ }
+ JExtended = false;
+ }
+ return false;
+}
+}
+
+/// tryCompound - Given a bundle check for compound insns when one
+/// is found update the contents fo the bundle with the compound insn.
+/// If a compound instruction is found then the bundle will have one
+/// additional slot.
+void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
+ MCContext &Context, MCInst &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCI) &&
+ "Non-Bundle where Bundle expected");
+
+ // By definition a compound must have 2 insn.
+ if (MCI.size() < 2)
+ return;
+
+ // Look for compounds until none are found, only update the bundle when
+ // a compound is found.
+ while (lookForCompound(MCII, Context, MCI))
+ ;
+
+ return;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
new file mode 100644
index 0000000..e6194f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -0,0 +1,1087 @@
+//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements duplexing of instructions to reduce code size
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-mcduplex-info"
+
+// pair table of subInstructions with opcodes
+static const std::pair<unsigned, unsigned> opcodeData[] = {
+ std::make_pair((unsigned)V4_SA1_addi, 0),
+ std::make_pair((unsigned)V4_SA1_addrx, 6144),
+ std::make_pair((unsigned)V4_SA1_addsp, 3072),
+ std::make_pair((unsigned)V4_SA1_and1, 4608),
+ std::make_pair((unsigned)V4_SA1_clrf, 6768),
+ std::make_pair((unsigned)V4_SA1_clrfnew, 6736),
+ std::make_pair((unsigned)V4_SA1_clrt, 6752),
+ std::make_pair((unsigned)V4_SA1_clrtnew, 6720),
+ std::make_pair((unsigned)V4_SA1_cmpeqi, 6400),
+ std::make_pair((unsigned)V4_SA1_combine0i, 7168),
+ std::make_pair((unsigned)V4_SA1_combine1i, 7176),
+ std::make_pair((unsigned)V4_SA1_combine2i, 7184),
+ std::make_pair((unsigned)V4_SA1_combine3i, 7192),
+ std::make_pair((unsigned)V4_SA1_combinerz, 7432),
+ std::make_pair((unsigned)V4_SA1_combinezr, 7424),
+ std::make_pair((unsigned)V4_SA1_dec, 4864),
+ std::make_pair((unsigned)V4_SA1_inc, 4352),
+ std::make_pair((unsigned)V4_SA1_seti, 2048),
+ std::make_pair((unsigned)V4_SA1_setin1, 6656),
+ std::make_pair((unsigned)V4_SA1_sxtb, 5376),
+ std::make_pair((unsigned)V4_SA1_sxth, 5120),
+ std::make_pair((unsigned)V4_SA1_tfr, 4096),
+ std::make_pair((unsigned)V4_SA1_zxtb, 5888),
+ std::make_pair((unsigned)V4_SA1_zxth, 5632),
+ std::make_pair((unsigned)V4_SL1_loadri_io, 0),
+ std::make_pair((unsigned)V4_SL1_loadrub_io, 4096),
+ std::make_pair((unsigned)V4_SL2_deallocframe, 7936),
+ std::make_pair((unsigned)V4_SL2_jumpr31, 8128),
+ std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133),
+ std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135),
+ std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132),
+ std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134),
+ std::make_pair((unsigned)V4_SL2_loadrb_io, 4096),
+ std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680),
+ std::make_pair((unsigned)V4_SL2_loadrh_io, 0),
+ std::make_pair((unsigned)V4_SL2_loadri_sp, 7168),
+ std::make_pair((unsigned)V4_SL2_loadruh_io, 2048),
+ std::make_pair((unsigned)V4_SL2_return, 8000),
+ std::make_pair((unsigned)V4_SL2_return_f, 8005),
+ std::make_pair((unsigned)V4_SL2_return_fnew, 8007),
+ std::make_pair((unsigned)V4_SL2_return_t, 8004),
+ std::make_pair((unsigned)V4_SL2_return_tnew, 8006),
+ std::make_pair((unsigned)V4_SS1_storeb_io, 4096),
+ std::make_pair((unsigned)V4_SS1_storew_io, 0),
+ std::make_pair((unsigned)V4_SS2_allocframe, 7168),
+ std::make_pair((unsigned)V4_SS2_storebi0, 4608),
+ std::make_pair((unsigned)V4_SS2_storebi1, 4864),
+ std::make_pair((unsigned)V4_SS2_stored_sp, 2560),
+ std::make_pair((unsigned)V4_SS2_storeh_io, 0),
+ std::make_pair((unsigned)V4_SS2_storew_sp, 2048),
+ std::make_pair((unsigned)V4_SS2_storewi0, 4096),
+ std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
+
+static std::map<unsigned, unsigned>
+ subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData));
+
+bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ return false;
+ case HexagonII::HSIG_L1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_L2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_A:
+ return (Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_Compound:
+ return (Gb == HexagonII::HSIG_Compound);
+ }
+ return false;
+}
+
+unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0;
+ case HexagonII::HSIG_A:
+ return 0x4;
+ }
+ case HexagonII::HSIG_L2:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0x1;
+ case HexagonII::HSIG_L2:
+ return 0x2;
+ case HexagonII::HSIG_A:
+ return 0x5;
+ }
+ case HexagonII::HSIG_S1:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0x8;
+ case HexagonII::HSIG_L2:
+ return 0x9;
+ case HexagonII::HSIG_S1:
+ return 0xA;
+ case HexagonII::HSIG_A:
+ return 0x6;
+ }
+ case HexagonII::HSIG_S2:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_L1:
+ return 0xC;
+ case HexagonII::HSIG_L2:
+ return 0xD;
+ case HexagonII::HSIG_S1:
+ return 0xB;
+ case HexagonII::HSIG_S2:
+ return 0xE;
+ case HexagonII::HSIG_A:
+ return 0x7;
+ }
+ case HexagonII::HSIG_A:
+ switch (Gb) {
+ default:
+ break;
+ case HexagonII::HSIG_A:
+ return 0x3;
+ }
+ case HexagonII::HSIG_Compound:
+ switch (Gb) {
+ case HexagonII::HSIG_Compound:
+ return 0xFFFFFFFF;
+ }
+ }
+ return 0xFFFFFFFF;
+}
+
+unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
+ unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg;
+
+ switch (MCI.getOpcode()) {
+ default:
+ return HexagonII::HSIG_None;
+ //
+ // Group L1:
+ //
+ // Rd = memw(Rs+#u4:2)
+ // Rd = memub(Rs+#u4:0)
+ case Hexagon::L2_loadri_io:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ // Special case this one from Group L2.
+ // Rd = memw(r29+#u5:2)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) &&
+ Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) {
+ return HexagonII::HSIG_L2;
+ }
+ // Rd = memw(Rs+#u4:2)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ inRange<4, 2>(MCI, 2)) {
+ return HexagonII::HSIG_L1;
+ }
+ }
+ break;
+ case Hexagon::L2_loadrub_io:
+ // Rd = memub(Rs+#u4:0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ inRange<4>(MCI, 2)) {
+ return HexagonII::HSIG_L1;
+ }
+ break;
+ //
+ // Group L2:
+ //
+ // Rd = memh/memuh(Rs+#u3:1)
+ // Rd = memb(Rs+#u3:0)
+ // Rd = memw(r29+#u5:2) - Handled above.
+ // Rdd = memd(r29+#u5:3)
+ // deallocframe
+ // [if ([!]p0[.new])] dealloc_return
+ // [if ([!]p0[.new])] jumpr r31
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ // Rd = memh/memuh(Rs+#u3:1)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ inRange<3, 1>(MCI, 2)) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L2_loadrb_io:
+ // Rd = memb(Rs+#u3:0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ inRange<3>(MCI, 2)) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L2_loadrd_io:
+ // Rdd = memd(r29+#u5:3)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+ inRange<5, 3>(MCI, 2)) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+
+ case Hexagon::L4_return:
+
+ case Hexagon::L2_deallocframe:
+
+ return HexagonII::HSIG_L2;
+ case Hexagon::EH_RETURN_JMPR:
+
+ case Hexagon::J2_jumpr:
+ case Hexagon::JMPret:
+ // jumpr r31
+ // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+ DstReg = MCI.getOperand(0).getReg();
+ if (Hexagon::R31 == DstReg) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+
+ case Hexagon::J2_jumprt:
+ case Hexagon::J2_jumprf:
+ case Hexagon::J2_jumprtnew:
+ case Hexagon::J2_jumprfnew:
+ case Hexagon::JMPrett:
+ case Hexagon::JMPretf:
+ case Hexagon::JMPrettnew:
+ case Hexagon::JMPretfnew:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPretfnewpt:
+ DstReg = MCI.getOperand(1).getReg();
+ SrcReg = MCI.getOperand(0).getReg();
+ // [if ([!]p0[.new])] jumpr r31
+ if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) &&
+ (Hexagon::R31 == DstReg)) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ case Hexagon::L4_return_t:
+
+ case Hexagon::L4_return_f:
+
+ case Hexagon::L4_return_tnew_pnt:
+
+ case Hexagon::L4_return_fnew_pnt:
+
+ case Hexagon::L4_return_tnew_pt:
+
+ case Hexagon::L4_return_fnew_pt:
+ // [if ([!]p0[.new])] dealloc_return
+ SrcReg = MCI.getOperand(0).getReg();
+ if (Hexagon::P0 == SrcReg) {
+ return HexagonII::HSIG_L2;
+ }
+ break;
+ //
+ // Group S1:
+ //
+ // memw(Rs+#u4:2) = Rt
+ // memb(Rs+#u4:0) = Rt
+ case Hexagon::S2_storeri_io:
+ // Special case this one from Group S2.
+ // memw(r29+#u5:2) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) {
+ return HexagonII::HSIG_S2;
+ }
+ // memw(Rs+#u4:2) = Rt
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ inRange<4, 2>(MCI, 1)) {
+ return HexagonII::HSIG_S1;
+ }
+ break;
+ case Hexagon::S2_storerb_io:
+ // memb(Rs+#u4:0) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ inRange<4>(MCI, 1)) {
+ return HexagonII::HSIG_S1;
+ }
+ break;
+ //
+ // Group S2:
+ //
+ // memh(Rs+#u3:1) = Rt
+ // memw(r29+#u5:2) = Rt
+ // memd(r29+#s6:3) = Rtt
+ // memw(Rs+#u4:2) = #U1
+ // memb(Rs+#u4) = #U1
+ // allocframe(#u5:3)
+ case Hexagon::S2_storerh_io:
+ // memh(Rs+#u3:1) = Rt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+ inRange<3, 1>(MCI, 1)) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S2_storerd_io:
+ // memd(r29+#s6:3) = Rtt
+ Src1Reg = MCI.getOperand(0).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
+ HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
+ inSRange<6, 3>(MCI, 1)) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S4_storeiri_io:
+ // memw(Rs+#u4:2) = #U1
+ Src1Reg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S4_storeirb_io:
+ // memb(Rs+#u4) = #U1
+ Src1Reg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+ inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) {
+ return HexagonII::HSIG_S2;
+ }
+ break;
+ case Hexagon::S2_allocframe:
+ if (inRange<5, 3>(MCI, 0))
+ return HexagonII::HSIG_S2;
+ break;
+ //
+ // Group A:
+ //
+ // Rx = add(Rx,#s7)
+ // Rd = Rs
+ // Rd = #u6
+ // Rd = #-1
+ // if ([!]P0[.new]) Rd = #0
+ // Rd = add(r29,#u6:2)
+ // Rx = add(Rx,Rs)
+ // P0 = cmp.eq(Rs,#u2)
+ // Rdd = combine(#0,Rs)
+ // Rdd = combine(Rs,#0)
+ // Rdd = combine(#u2,#U2)
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ // Rd = and(Rs,#1)
+ case Hexagon::A2_addi:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ // Rd = add(r29,#u6:2)
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+ inRange<6, 2>(MCI, 2)) {
+ return HexagonII::HSIG_A;
+ }
+ // Rx = add(Rx,#s7)
+ if (DstReg == SrcReg) {
+ return HexagonII::HSIG_A;
+ }
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) {
+ return HexagonII::HSIG_A;
+ }
+ }
+ break;
+ case Hexagon::A2_add:
+ // Rx = add(Rx,Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ Src1Reg = MCI.getOperand(1).getReg();
+ Src2Reg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_andir:
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_tfrsi:
+ DstReg = MCI.getOperand(0).getReg();
+
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::C2_cmovenewif:
+ // if ([!]P0[.new]) Rd = #0
+ // Actual form:
+ // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+ DstReg = MCI.getOperand(0).getReg(); // Rd
+ PredReg = MCI.getOperand(1).getReg(); // P0
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::C2_cmpeqi:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (Hexagon::P0 == DstReg &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ inRange<2>(MCI, 2)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ // Rdd = combine(#u2,#U2)
+ DstReg = MCI.getOperand(0).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A4_combineri:
+ // Rdd = combine(Rs,#0)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ minConstant(MCI, 2) == 0) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A4_combineir:
+ // Rdd = combine(#0,Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(2).getReg();
+ if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+ minConstant(MCI, 1) == 0) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ DstReg = MCI.getOperand(0).getReg();
+ SrcReg = MCI.getOperand(1).getReg();
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+ HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+ return HexagonII::HSIG_A;
+ }
+ break;
+ }
+
+ return HexagonII::HSIG_None;
+}
+
+bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
+ unsigned DstReg, SrcReg;
+ switch (potentialDuplex.getOpcode()) {
+ case Hexagon::A2_addi:
+ // testing for case of: Rx = add(Rx,#s7)
+ DstReg = potentialDuplex.getOperand(0).getReg();
+ SrcReg = potentialDuplex.getOperand(1).getReg();
+ if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ int64_t Value;
+ if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value))
+ return true;
+ if (!isShiftedInt<7, 0>(Value))
+ return true;
+ }
+ break;
+ case Hexagon::A2_tfrsi:
+ DstReg = potentialDuplex.getOperand(0).getReg();
+
+ if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+ int64_t Value;
+ if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value))
+ return true;
+ // Check for case of Rd = #-1.
+ if (Value == -1)
+ return false;
+ // Check for case of Rd = #u6.
+ if (!isShiftedUInt<6, 0>(Value))
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+/// non-Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
+ MCInst const &MIa, bool ExtendedA,
+ MCInst const &MIb, bool ExtendedB,
+ bool bisReversable) {
+ // Slot 1 cannot be extended in duplexes PRM 10.5
+ if (ExtendedA)
+ return false;
+ // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5
+ if (ExtendedB) {
+ unsigned Opcode = MIb.getOpcode();
+ if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi))
+ return false;
+ }
+ unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa),
+ MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb);
+
+ // If a duplex contains 2 insns in the same group, the insns must be
+ // ordered such that the numerically smaller opcode is in slot 1.
+ if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) {
+ MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa);
+ MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb);
+
+ unsigned zeroedSubInstS0 =
+ subinstOpcodeMap.find(SubInst0.getOpcode())->second;
+ unsigned zeroedSubInstS1 =
+ subinstOpcodeMap.find(SubInst1.getOpcode())->second;
+
+ if (zeroedSubInstS0 < zeroedSubInstS1)
+ // subinstS0 (maps to slot 0) must be greater than
+ // subinstS1 (maps to slot 1)
+ return false;
+ }
+
+ // allocframe must always be in slot 0
+ if (MIb.getOpcode() == Hexagon::S2_allocframe)
+ return false;
+
+ if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) {
+ // Prevent 2 instructions with extenders from duplexing
+ // Note that MIb (slot1) can be extended and MIa (slot0)
+ // can never be extended
+ if (subInstWouldBeExtended(MIa))
+ return false;
+
+ // If duplexing produces an extender, but the original did not
+ // have an extender, do not duplex.
+ if (subInstWouldBeExtended(MIb) && !ExtendedB)
+ return false;
+ }
+
+ // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb).
+ if (MIbG == HexagonII::HSIG_L2) {
+ if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() &&
+ (MIb.getOperand(1).getReg() == Hexagon::R31))
+ return false;
+ if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() &&
+ (MIb.getOperand(0).getReg() == Hexagon::R31))
+ return false;
+ }
+
+ // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
+ // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
+ if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
+ if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
+ return false;
+ }
+
+ return (isDuplexPairMatch(MIaG, MIbG));
+}
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) {
+ unsigned MIaG = getDuplexCandidateGroup(MIa),
+ MIbG = getDuplexCandidateGroup(MIb);
+ return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
+ unsigned opNum) {
+ if (Inst.getOperand(opNum).isReg()) {
+ switch (Inst.getOperand(opNum).getReg()) {
+ default:
+ llvm_unreachable("Not Duplexable Register");
+ break;
+ case Hexagon::R0:
+ case Hexagon::R1:
+ case Hexagon::R2:
+ case Hexagon::R3:
+ case Hexagon::R4:
+ case Hexagon::R5:
+ case Hexagon::R6:
+ case Hexagon::R7:
+ case Hexagon::D0:
+ case Hexagon::D1:
+ case Hexagon::D2:
+ case Hexagon::D3:
+ case Hexagon::R16:
+ case Hexagon::R17:
+ case Hexagon::R18:
+ case Hexagon::R19:
+ case Hexagon::R20:
+ case Hexagon::R21:
+ case Hexagon::R22:
+ case Hexagon::R23:
+ case Hexagon::D8:
+ case Hexagon::D9:
+ case Hexagon::D10:
+ case Hexagon::D11:
+ subInstPtr.addOperand(Inst.getOperand(opNum));
+ break;
+ }
+ } else
+ subInstPtr.addOperand(Inst.getOperand(opNum));
+}
+
+MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
+ MCInst Result;
+ bool Absolute;
+ int64_t Value;
+ switch (Inst.getOpcode()) {
+ default:
+ // dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
+ llvm_unreachable("Unimplemented subinstruction \n");
+ break;
+ case Hexagon::A2_addi:
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
+ Result.setOpcode(Hexagon::V4_SA1_inc);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs, #1)
+ else if (Value == -1) {
+ Result.setOpcode(Hexagon::V4_SA1_dec);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break;
+ } // 1,2 SUBInst $Rd = add($Rs,#-1)
+ else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SA1_addsp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,3 SUBInst $Rd = add(r29, #$u6_2)
+ else {
+ Result.setOpcode(Hexagon::V4_SA1_addi);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break;
+ } // 1,2,3 SUBInst $Rx = add($Rx, #$s7)
+ case Hexagon::A2_add:
+ Result.setOpcode(Hexagon::V4_SA1_addrx);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs)
+ case Hexagon::S2_allocframe:
+ Result.setOpcode(Hexagon::V4_SS2_allocframe);
+ addOps(Result, Inst, 0);
+ break; // 1 SUBInst allocframe(#$u5_3)
+ case Hexagon::A2_andir:
+ if (minConstant(Inst, 2) == 255) {
+ Result.setOpcode(Hexagon::V4_SA1_zxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 $Rd = and($Rs, #255)
+ } else {
+ Result.setOpcode(Hexagon::V4_SA1_and1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = and($Rs, #1)
+ }
+ case Hexagon::C2_cmpeqi:
+ Result.setOpcode(Hexagon::V4_SA1_cmpeqi);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_combineii:
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
+ Result.setOpcode(Hexagon::V4_SA1_combine1i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#1, #$u2)
+ }
+ if (Value == 3) {
+ Result.setOpcode(Hexagon::V4_SA1_combine3i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#3, #$u2)
+ }
+ if (Value == 0) {
+ Result.setOpcode(Hexagon::V4_SA1_combine0i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#0, #$u2)
+ }
+ if (Value == 2) {
+ Result.setOpcode(Hexagon::V4_SA1_combine2i);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#2, #$u2)
+ }
+ case Hexagon::A4_combineir:
+ Result.setOpcode(Hexagon::V4_SA1_combinezr);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = combine(#0, $Rs)
+
+ case Hexagon::A4_combineri:
+ Result.setOpcode(Hexagon::V4_SA1_combinerz);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rdd = combine($Rs, #0)
+ case Hexagon::L4_return_tnew_pnt:
+ case Hexagon::L4_return_tnew_pt:
+ Result.setOpcode(Hexagon::V4_SL2_return_tnew);
+ break; // none SUBInst if (p0.new) dealloc_return:nt
+ case Hexagon::L4_return_fnew_pnt:
+ case Hexagon::L4_return_fnew_pt:
+ Result.setOpcode(Hexagon::V4_SL2_return_fnew);
+ break; // none SUBInst if (!p0.new) dealloc_return:nt
+ case Hexagon::L4_return_f:
+ Result.setOpcode(Hexagon::V4_SL2_return_f);
+ break; // none SUBInst if (!p0) dealloc_return
+ case Hexagon::L4_return_t:
+ Result.setOpcode(Hexagon::V4_SL2_return_t);
+ break; // none SUBInst if (p0) dealloc_return
+ case Hexagon::L4_return:
+ Result.setOpcode(Hexagon::V4_SL2_return);
+ break; // none SUBInst dealloc_return
+ case Hexagon::L2_deallocframe:
+ Result.setOpcode(Hexagon::V4_SL2_deallocframe);
+ break; // none SUBInst deallocframe
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::J2_jumpr:
+ case Hexagon::JMPret:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31);
+ break; // none SUBInst jumpr r31
+ case Hexagon::J2_jumprf:
+ case Hexagon::JMPretf:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_f);
+ break; // none SUBInst if (!p0) jumpr r31
+ case Hexagon::J2_jumprfnew:
+ case Hexagon::JMPretfnewpt:
+ case Hexagon::JMPretfnew:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew);
+ break; // none SUBInst if (!p0.new) jumpr:nt r31
+ case Hexagon::J2_jumprt:
+ case Hexagon::JMPrett:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_t);
+ break; // none SUBInst if (p0) jumpr r31
+ case Hexagon::J2_jumprtnew:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPrettnew:
+ Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew);
+ break; // none SUBInst if (p0.new) jumpr:nt r31
+ case Hexagon::L2_loadrb_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrb_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0)
+ case Hexagon::L2_loadrd_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrd_sp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3)
+ case Hexagon::L2_loadrh_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadrh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1)
+ case Hexagon::L2_loadrub_io:
+ Result.setOpcode(Hexagon::V4_SL1_loadrub_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0)
+ case Hexagon::L2_loadruh_io:
+ Result.setOpcode(Hexagon::V4_SL2_loadruh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1)
+ case Hexagon::L2_loadri_io:
+ if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SL2_loadri_sp);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 2);
+ break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2)
+ } else {
+ Result.setOpcode(Hexagon::V4_SL1_loadri_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
+ }
+ case Hexagon::S4_storeirb_io:
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
+ Result.setOpcode(Hexagon::V4_SS2_storebi0);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0
+ } else if (Value == 1) {
+ Result.setOpcode(Hexagon::V4_SS2_storebi1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1
+ }
+ case Hexagon::S2_storerb_io:
+ Result.setOpcode(Hexagon::V4_SS1_storeb_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+ case Hexagon::S2_storerd_io:
+ Result.setOpcode(Hexagon::V4_SS2_stored_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt
+ case Hexagon::S2_storerh_io:
+ Result.setOpcode(Hexagon::V4_SS2_storeh_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+ case Hexagon::S4_storeiri_io:
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
+ Result.setOpcode(Hexagon::V4_SS2_storewi0);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0
+ } else if (Value == 1) {
+ Result.setOpcode(Hexagon::V4_SS2_storewi1);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1
+ } else if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
+ break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt
+ }
+ case Hexagon::S2_storeri_io:
+ if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+ Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt
+ } else {
+ Result.setOpcode(Hexagon::V4_SS1_storew_io);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt
+ }
+ break;
+ case Hexagon::A2_sxtb:
+ Result.setOpcode(Hexagon::V4_SA1_sxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = sxtb($Rs)
+ case Hexagon::A2_sxth:
+ Result.setOpcode(Hexagon::V4_SA1_sxth);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = sxth($Rs)
+ case Hexagon::A2_tfr:
+ Result.setOpcode(Hexagon::V4_SA1_tfr);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = $Rs
+ case Hexagon::C2_cmovenewif:
+ Result.setOpcode(Hexagon::V4_SA1_clrfnew);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (!p0.new) $Rd = #0
+ case Hexagon::C2_cmovenewit:
+ Result.setOpcode(Hexagon::V4_SA1_clrtnew);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (p0.new) $Rd = #0
+ case Hexagon::C2_cmoveif:
+ Result.setOpcode(Hexagon::V4_SA1_clrf);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (!p0) $Rd = #0
+ case Hexagon::C2_cmoveit:
+ Result.setOpcode(Hexagon::V4_SA1_clrt);
+ addOps(Result, Inst, 0);
+ break; // 2 SUBInst if (p0) $Rd = #0
+ case Hexagon::A2_tfrsi:
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute && Value == -1) {
+ Result.setOpcode(Hexagon::V4_SA1_setin1);
+ addOps(Result, Inst, 0);
+ break; // 2 1 SUBInst $Rd = #-1
+ } else {
+ Result.setOpcode(Hexagon::V4_SA1_seti);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = #$u6
+ }
+ case Hexagon::A2_zxtb:
+ Result.setOpcode(Hexagon::V4_SA1_zxtb);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 $Rd = and($Rs, #255)
+
+ case Hexagon::A2_zxth:
+ Result.setOpcode(Hexagon::V4_SA1_zxth);
+ addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
+ break; // 1,2 SUBInst $Rd = zxth($Rs)
+ }
+ return Result;
+}
+
+static bool isStoreInst(unsigned opCode) {
+ switch (opCode) {
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerb_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S2_allocframe:
+ return true;
+ default:
+ return false;
+ }
+}
+
+SmallVector<DuplexCandidate, 8>
+HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCInst const &MCB) {
+ assert(isBundle(MCB));
+ SmallVector<DuplexCandidate, 8> duplexToTry;
+ // Use an "order matters" version of isDuplexPair.
+ unsigned numInstrInPacket = MCB.getNumOperands();
+
+ for (unsigned distance = 1; distance < numInstrInPacket; ++distance) {
+ for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset,
+ k = j + distance;
+ (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) {
+
+ // Check if reversable.
+ bool bisReversable = true;
+ if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) &&
+ isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) {
+ DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j
+ << "\n");
+ bisReversable = false;
+ }
+ if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
+ bisReversable = false;
+
+ // Try in order.
+ if (isOrderedDuplexPair(
+ MCII, *MCB.getOperand(k).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+ *MCB.getOperand(j).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+ bisReversable)) {
+ // Get iClass.
+ unsigned iClass = iClassOfDuplexPair(
+ getDuplexCandidateGroup(*MCB.getOperand(k).getInst()),
+ getDuplexCandidateGroup(*MCB.getOperand(j).getInst()));
+
+ // Save off pairs for duplex checking.
+ duplexToTry.push_back(DuplexCandidate(j, k, iClass));
+ DEBUG(dbgs() << "adding pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ continue;
+ } else {
+ DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ }
+
+ // Try reverse.
+ if (bisReversable) {
+ if (isOrderedDuplexPair(
+ MCII, *MCB.getOperand(j).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+ *MCB.getOperand(k).getInst(),
+ HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+ bisReversable)) {
+ // Get iClass.
+ unsigned iClass = iClassOfDuplexPair(
+ getDuplexCandidateGroup(*MCB.getOperand(j).getInst()),
+ getDuplexCandidateGroup(*MCB.getOperand(k).getInst()));
+
+ // Save off pairs for duplex checking.
+ duplexToTry.push_back(DuplexCandidate(k, j, iClass));
+ DEBUG(dbgs() << "adding pair:" << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ } else {
+ DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ }
+ }
+ }
+ }
+ return duplexToTry;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
new file mode 100644
index 0000000..eaa3550
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -0,0 +1,150 @@
+//=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a stub that parses a MCInst bundle and passes the
+// instructions on to the real streamer.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagonmcelfstreamer"
+
+#include "Hexagon.h"
+#include "HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ GPSize("gpsize", cl::NotHidden,
+ cl::desc("Global Pointer Addressing Size. The default size is 8."),
+ cl::Prefix, cl::init(8));
+
+void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
+ const MCSubtargetInfo &STI) {
+ MCInst HMI = HexagonMCInstrInfo::createBundle();
+ MCInst *MCB;
+
+ if (MCK.getOpcode() != Hexagon::BUNDLE) {
+ HMI.addOperand(MCOperand::createInst(&MCK));
+ MCB = &HMI;
+ } else
+ MCB = const_cast<MCInst *>(&MCK);
+
+ // Examines packet and pad the packet, if needed, when an
+ // end-loop is in the bundle.
+ HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
+ HexagonMCShuffle(*MCII, STI, *MCB);
+
+ assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
+ bool Extended = false;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ MCInst *MCI = const_cast<MCInst *>(I.getInst());
+ if (Extended) {
+ if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
+ MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
+ } else {
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
+ }
+ Extended = false;
+ } else {
+ Extended = HexagonMCInstrInfo::isImmext(*MCI);
+ }
+ }
+
+ // At this point, MCB is a bundle
+ // Iterate through the bundle and assign addends for the instructions
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ MCInst *MCI = const_cast<MCInst *>(I.getInst());
+ EmitSymbol(*MCI);
+ }
+ MCObjectStreamer::EmitInstruction(*MCB, STI);
+}
+
+void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--;)
+ if (Inst.getOperand(i).isExpr())
+ visitUsedExpr(*Inst.getOperand(i).getExpr());
+}
+
+// EmitCommonSymbol and EmitLocalCommonSymbol are extended versions of the
+// functions found in MCELFStreamer.cpp taking AccessSize as an additional
+// parameter.
+void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
+ uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize) {
+ getAssembler().registerSymbol(*Symbol);
+ StringRef sbss[4] = {".sbss.1", ".sbss.2", ".sbss.4", ".sbss.8"};
+
+ auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+ if (!ELFSymbol->isBindingSet()) {
+ ELFSymbol->setBinding(ELF::STB_GLOBAL);
+ ELFSymbol->setExternal(true);
+ }
+
+ ELFSymbol->setType(ELF::STT_OBJECT);
+
+ if (ELFSymbol->getBinding() == ELF::STB_LOCAL) {
+ StringRef SectionName =
+ ((AccessSize == 0) || (Size == 0) || (Size > GPSize))
+ ? ".bss"
+ : sbss[(Log2_64(AccessSize))];
+
+ MCSection *CrntSection = getCurrentSection().first;
+ MCSection *Section = getAssembler().getContext().getELFSection(
+ SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ SwitchSection(Section);
+ AssignFragment(Symbol, getCurrentFragment());
+
+ MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
+ SwitchSection(CrntSection);
+ } else {
+ if (ELFSymbol->declareCommon(Size, ByteAlignment))
+ report_fatal_error("Symbol: " + Symbol->getName() +
+ " redeclared as different type");
+ if ((AccessSize) && (Size <= GPSize)) {
+ uint64_t SectionIndex =
+ (AccessSize <= GPSize)
+ ? ELF::SHN_HEXAGON_SCOMMON + (Log2_64(AccessSize) + 1)
+ : (unsigned)ELF::SHN_HEXAGON_SCOMMON;
+ ELFSymbol->setIndex(SectionIndex);
+ }
+ }
+
+ ELFSymbol->setSize(MCConstantExpr::create(Size, getContext()));
+}
+
+void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
+ MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment,
+ unsigned AccessSize) {
+ getAssembler().registerSymbol(*Symbol);
+ auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+ ELFSymbol->setBinding(ELF::STB_LOCAL);
+ ELFSymbol->setExternal(false);
+ HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
+}
+
+namespace llvm {
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE) {
+ return new HexagonMCELFStreamer(Context, MAB, OS, CE);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
new file mode 100644
index 0000000..d77c0cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -0,0 +1,45 @@
+//===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCELFSTREAMER_H
+#define HEXAGONMCELFSTREAMER_H
+
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "HexagonTargetStreamer.h"
+
+namespace llvm {
+
+class HexagonMCELFStreamer : public MCELFStreamer {
+ std::unique_ptr<MCInstrInfo> MCII;
+
+public:
+ HexagonMCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ MCII(createHexagonMCInstrInfo()) {}
+
+ virtual void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override;
+ void EmitSymbol(const MCInst &Inst);
+ void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize);
+ void HexagonMCEmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, unsigned AccessSize);
+};
+
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
new file mode 100644
index 0000000..fc62626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
+//----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-mcexpr"
+
+HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) HexagonNoExtendOperand(Expr);
+}
+
+bool HexagonNoExtendOperand::evaluateAsRelocatableImpl(
+ MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const {
+ return Expr->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {}
+
+MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const {
+ return Expr->findAssociatedFragment();
+}
+
+void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; }
+
+bool HexagonNoExtendOperand::classof(MCExpr const *E) {
+ return E->getKind() == MCExpr::Target;
+}
+
+HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr)
+ : Expr(Expr) {}
+
+void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ Expr->print(OS, MAI);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
new file mode 100644
index 0000000..60f180f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -0,0 +1,35 @@
+//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCInst;
+class HexagonNoExtendOperand : public MCTargetExpr {
+public:
+ static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx);
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override;
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+ static bool classof(MCExpr const *E);
+ MCExpr const *getExpr() const;
+
+private:
+ HexagonNoExtendOperand(MCExpr const *Expr);
+ MCExpr const *Expr;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
new file mode 100644
index 0000000..e684207
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -0,0 +1,648 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCInstrInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonBaseInfo.h"
+#include "HexagonMCChecker.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
+ MCContext &Context) {
+ MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
+}
+
+void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+ MCOperand const &exOp =
+ MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+
+ // Create the extender.
+ MCInst *XMCI =
+ new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp));
+
+ MCB.addOperand(MCOperand::createInst(XMCI));
+}
+
+iterator_range<MCInst::const_iterator>
+HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
+}
+
+size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
+ if (HexagonMCInstrInfo::isBundle(MCI))
+ return (MCI.size() - bundleInstructionsOffset);
+ else
+ return (1);
+}
+
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Check) {
+ // Examine the packet and convert pairs of instructions to compound
+ // instructions when possible.
+ if (!HexagonDisableCompound)
+ HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
+ // Check the bundle for errors.
+ bool CheckOk = Check ? Check->check() : true;
+ if (!CheckOk)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ // Examine the packet and convert pairs of instructions to duplex
+ // instructions when possible.
+ MCInst InstBundlePreDuplex = MCInst(MCB);
+ if (!HexagonDisableDuplex) {
+ SmallVector<DuplexCandidate, 8> possibleDuplexes;
+ possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+ HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+ }
+ // Examines packet and pad the packet, if needed, when an
+ // end-loop is in the bundle.
+ HexagonMCInstrInfo::padEndloop(Context, MCB);
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ return true;
+}
+
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
+ MCContext &Context, MCInst &MCI) {
+ assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI));
+ MCOperand &exOp =
+ MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+ // If the extended value is a constant, then use it for the extended and
+ // for the extender instructions, masking off the lower 6 bits and
+ // including the assumed bits.
+ int64_t Value;
+ if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
+ unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
+ exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context));
+ }
+}
+
+MCInst HexagonMCInstrInfo::createBundle() {
+ MCInst Result;
+ Result.setOpcode(Hexagon::BUNDLE);
+ Result.addOperand(MCOperand::createImm(0));
+ return Result;
+}
+
+MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
+ MCInst const &inst0,
+ MCInst const &inst1) {
+ assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
+ MCInst *duplexInst = new (Context) MCInst;
+ duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
+
+ MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
+ MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
+ duplexInst->addOperand(MCOperand::createInst(SubInst0));
+ duplexInst->addOperand(MCOperand::createInst(SubInst1));
+ return duplexInst;
+}
+
+MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
+ MCInst const &Inst,
+ MCOperand const &MO) {
+ assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
+ HexagonMCInstrInfo::isExtended(MCII, Inst));
+
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+ MCInst XMI;
+ XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
+ ? Hexagon::A4_ext_b
+ : Hexagon::A4_ext);
+ if (MO.isImm())
+ XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
+ else if (MO.isExpr())
+ XMI.addOperand(MCOperand::createExpr(MO.getExpr()));
+ else
+ llvm_unreachable("invalid extendable operand");
+ return XMI;
+}
+
+MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
+ size_t Index) {
+ assert(Index <= bundleSize(MCB));
+ if (Index == 0)
+ return nullptr;
+ MCInst const *Inst =
+ MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst();
+ if (isImmext(*Inst))
+ return Inst;
+ return nullptr;
+}
+
+void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend) {
+ if (isConstExtended(MCII, MCI) || MustExtend)
+ addConstExtender(Context, MCII, MCB, MCI);
+}
+
+HexagonII::MemAccessSize
+HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+ return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) &
+ HexagonII::MemAccesSizeMask));
+}
+
+unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (MCII.get(MCI.getOpcode()));
+}
+
+unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI);
+ MCOperand const &MO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI)) &&
+ (MO.isImm() || MO.isExpr()));
+ return (MO);
+}
+
+unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask);
+}
+
+unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1U << (bits - 1));
+ else
+ return ~(-1U << bits);
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned isSigned =
+ (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1U << (bits - 1);
+ else
+ return 0;
+}
+
+char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return MCII.getName(MCI.getOpcode());
+}
+
+unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask);
+}
+
+MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ unsigned const O =
+ (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+ MCOperand const &MCO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+ HexagonMCInstrInfo::hasNewValue(MCII, MCI)) &&
+ MCO.isReg());
+ return (MCO);
+}
+
+/// Return the new value or the newly produced value.
+unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI);
+ MCOperand const &MCO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+ HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) &&
+ MCO.isReg());
+ return (MCO);
+}
+
+int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+ HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>(
+ (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask);
+
+ switch (Target) {
+ default:
+ return Hexagon::ArchV4;
+ case HexagonII::HasV5SubT:
+ return Hexagon::ArchV5;
+ }
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+
+ const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+ int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
+}
+
+bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+ if (!HexagonMCInstrInfo::isBundle(MCI))
+ return false;
+
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
+ auto MI = I.getInst();
+ if (isImmext(*MI))
+ return true;
+ }
+
+ return false;
+}
+
+bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
+ return extenderForIndex(MCB, Index) != nullptr;
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+/// Return whether the insn produces a second value.
+bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2);
+}
+
+MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
+ assert(isBundle(MCB));
+ assert(Index < HEXAGON_PACKET_SIZE);
+ return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
+}
+
+bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) {
+ auto Result = Hexagon::BUNDLE == MCI.getOpcode();
+ assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm()));
+ return Result;
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+ !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
+ HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
+}
+
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
+}
+
+bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
+ (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
+}
+
+bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+// out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ if (HexagonMCInstrInfo::isExtended(MCII, MCI))
+ return true;
+ // Branch insns are handled as necessary by relaxation.
+ if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
+ return false;
+ // Otherwise loop instructions and other CR insts are handled by relaxation
+ else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) &&
+ (MCI.getOpcode() != Hexagon::C4_addipc))
+ return false;
+ else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+ return false;
+
+ MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI);
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ assert(!MO.isImm());
+ int64_t Value;
+ if (!MO.getExpr()->evaluateAsAbsolute(Value))
+ return true;
+ int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ return (MinValue > Value || Value > MaxValue);
+}
+
+bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::FPPos) & HexagonII::FPMask);
+}
+
+bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) {
+ auto Op = MCI.getOpcode();
+ return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c ||
+ Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext);
+}
+
+bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ int64_t Flags = MCI.getOperand(0).getImm();
+ return (Flags & innerLoopMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isIntReg(unsigned Reg) {
+ return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31);
+}
+
+bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+ (Reg >= Hexagon::R16 && Reg <= Hexagon::R23));
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
+ MCInst const &MCI,
+ unsigned short OperandNum) {
+ uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
+ OperandNum;
+}
+
+bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ int64_t Flags = MCI.getOperand(0).getImm();
+ return (Flags & outerLoopMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask);
+}
+
+/// Return whether the insn is newly predicated.
+bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
+bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (
+ !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask));
+}
+
+bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
+ return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
+}
+
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+}
+
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memReorderDisabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memStoreReorderEnabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
+}
+
+bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
+}
+
+bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
+ if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
+ (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
+ return true;
+ return false;
+}
+
+int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
+ auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+ << 8;
+ if (MCI.size() <= Index)
+ return Sentinal;
+ MCOperand const &MCO = MCI.getOperand(Index);
+ if (!MCO.isExpr())
+ return Sentinal;
+ int64_t Value;
+ if (!MCO.getExpr()->evaluateAsAbsolute(Value))
+ return Sentinal;
+ return Value;
+}
+
+void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
+ MCInst Nop;
+ Nop.setOpcode(Hexagon::A2_nop);
+ assert(isBundle(MCB));
+ while ((HexagonMCInstrInfo::isInnerLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
+ ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
+ MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
+}
+
+bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR)
+ return false;
+
+ unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2_SLOT23:
+ case Hexagon::Sched::ALU64_tc_3x_SLOT23:
+ case Hexagon::Sched::M_tc_2_SLOT23:
+ case Hexagon::Sched::M_tc_3x_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_3x_SLOT23:
+ return true;
+ }
+ return false;
+}
+
+void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
+ DuplexCandidate Candidate) {
+ assert(Candidate.packetIndexI < MCB.size());
+ assert(Candidate.packetIndexJ < MCB.size());
+ assert(isBundle(MCB));
+ MCInst *Duplex =
+ deriveDuplex(Context, Candidate.iClass,
+ *MCB.getOperand(Candidate.packetIndexJ).getInst(),
+ *MCB.getOperand(Candidate.packetIndexI).getInst());
+ assert(Duplex != nullptr);
+ MCB.getOperand(Candidate.packetIndexI).setInst(Duplex);
+ MCB.erase(MCB.begin() + Candidate.packetIndexJ);
+}
+
+void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | innerLoopMask);
+}
+
+void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memReorderDisabledMask);
+ assert(isMemReorderDisabled(MCI));
+}
+
+void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
+ assert(isMemStoreReorderEnabled(MCI));
+}
+
+void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | outerLoopMask);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
new file mode 100644
index 0000000..0237b28
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -0,0 +1,289 @@
+//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions for Hexagon specific MCInst queries
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+class HexagonMCChecker;
+class MCContext;
+class MCInstrDesc;
+class MCInstrInfo;
+class MCInst;
+class MCOperand;
+class MCSubtargetInfo;
+namespace HexagonII {
+enum class MemAccessSize;
+}
+class DuplexCandidate {
+public:
+ unsigned packetIndexI, packetIndexJ, iClass;
+ DuplexCandidate(unsigned i, unsigned j, unsigned iClass)
+ : packetIndexI(i), packetIndexJ(j), iClass(iClass) {}
+};
+namespace HexagonMCInstrInfo {
+size_t const innerLoopOffset = 0;
+int64_t const innerLoopMask = 1 << innerLoopOffset;
+
+size_t const outerLoopOffset = 1;
+int64_t const outerLoopMask = 1 << outerLoopOffset;
+
+// do not reorder memory load/stores by default load/stores are re-ordered
+// and by default loads can be re-ordered
+size_t const memReorderDisabledOffset = 2;
+int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+
+// allow re-ordering of memory stores by default stores cannot be re-ordered
+size_t const memStoreReorderEnabledOffset = 3;
+int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset;
+
+size_t const bundleInstructionsOffset = 1;
+
+void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
+void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI);
+
+// Returns a iterator range of instructions in this bundle
+iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
+
+// Returns the number of instructions in the bundle
+size_t bundleSize(MCInst const &MCI);
+
+// Put the packet in to canonical form, compound, duplex, pad, and shuffle
+bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Checker);
+
+// Clamp off upper 26 bits of extendable operand for emission
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend);
+
+// Create a duplex instruction given the two subinsts
+MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
+ MCInst const &inst1);
+MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
+ MCOperand const &MO);
+
+// Convert this instruction in to a duplex subinst
+MCInst deriveSubInst(MCInst const &Inst);
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+
+// Return memory access size
+HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
+ MCInst const &MCI);
+
+// Return number of bits in the constant extended operand.
+unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return constant extended operand number.
+unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
+
+MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return which duplex group this instruction belongs to
+unsigned getDuplexCandidateGroup(MCInst const &MI);
+
+// Return a list of all possible instruction duplex combinations
+SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCInst const &MCB);
+
+// Return the index of the extendable operand
+unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return a reference to the extendable operand
+MCOperand const &getExtendableOperand(MCInstrInfo const &MCII,
+ MCInst const &MCI);
+
+// Return the implicit alignment of the extendable operand
+unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the number of logical bits of the extendable operand
+unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return instruction name
+char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand index for the new value.
+unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI);
+
+int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the Hexagon ISA class for the insn.
+unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return the slots used by the insn.
+unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &MCI);
+
+// Does the packet have an extender for the instruction at Index
+bool hasExtenderForIndex(MCInst const &MCB, size_t Index);
+
+bool hasImmExt(MCInst const &MCI);
+
+// Return whether the instruction is a legal new-value producer.
+bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the instruction at Index
+MCInst const &instruction(MCInst const &MCB, size_t Index);
+
+// Returns whether this MCInst is a wellformed bundle
+bool isBundle(MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the duplex iclass given the two duplex classes
+unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
+
+int64_t minConstant(MCInst const &MCI, size_t Index);
+template <unsigned N, unsigned S>
+bool inRange(MCInst const &MCI, size_t Index) {
+ return isShiftedUInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N, unsigned S>
+bool inSRange(MCInst const &MCI, size_t Index) {
+ return isShiftedInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
+ return isUInt<N>(minConstant(MCI, Index));
+}
+
+// Return whether the instruction needs to be constant extended.
+bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Is this double register suitable for use in a duplex subinst
+bool isDblRegForSubInst(unsigned Reg);
+
+// Is this a duplex instruction
+bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Can these instructions be duplexed
+bool isDuplexPair(MCInst const &MIa, MCInst const &MIb);
+
+// Can these duplex classes be combine in to a duplex instruction
+bool isDuplexPairMatch(unsigned Ga, unsigned Gb);
+
+// Return true if the insn may be extended based on the operand value.
+bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction must be always extended.
+bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether it is a floating-point insn.
+bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Returns whether this instruction is an immediate extender
+bool isImmext(MCInst const &MCI);
+
+// Returns whether this bundle is an endloop0
+bool isInnerLoop(MCInst const &MCI);
+
+// Is this an integer register
+bool isIntReg(unsigned Reg);
+
+// Is this register suitable for use in a duplex subinst
+bool isIntRegForSubInst(unsigned Reg);
+bool isMemReorderDisabled(MCInst const &MCI);
+bool isMemStoreReorderEnabled(MCInst const &MCI);
+
+// Return whether the insn is a new-value consumer.
+bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the operand can be constant extended.
+bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
+ unsigned short OperandNum);
+
+// Can these two instructions be duplexed
+bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa,
+ bool ExtendedA, MCInst const &MIb, bool ExtendedB,
+ bool bisReversable);
+
+// Returns whether this bundle is an endloop1
+bool isOuterLoop(MCInst const &MCI);
+
+// Return whether this instruction is predicated
+bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the predicate sense is true
+bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Is this a predicate register
+bool isPredReg(unsigned Reg);
+
+// Return whether the insn is a prefix.
+bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether the insn can be packaged only with A and X-type insns.
+bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether the insn can be packaged only with an A-type insn in slot #1.
+bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Pad the bundle with nops to satisfy endloop requirements
+void padEndloop(MCContext &Context, MCInst &MCI);
+
+bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Replace the instructions inside MCB, represented by Candidate
+void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
+
+// Marks a bundle as endloop0
+void setInnerLoop(MCInst &MCI);
+void setMemReorderDisabled(MCInst &MCI);
+void setMemStoreReorderEnabled(MCInst &MCI);
+
+// Marks a bundle as endloop1
+void setOuterLoop(MCInst &MCI);
+
+// Would duplexing this instruction create a requirement to extend
+bool subInstWouldBeExtended(MCInst const &potentialDuplex);
+
+// Attempt to find and replace compound pairs
+void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+}
+}
+
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
new file mode 100644
index 0000000..8e70280
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -0,0 +1,237 @@
+//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-shuffle"
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false),
+ cl::desc("Disable Hexagon instruction shuffling"));
+
+void HexagonMCShuffler::init(MCInst &MCB) {
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ MCInst const *Extender = nullptr;
+ // Copy the bundle for the shuffling.
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+
+ if (!HexagonMCInstrInfo::isImmext(*MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+ false);
+ Extender = nullptr;
+ } else
+ Extender = MI;
+ }
+ }
+
+ BundleFlags = MCB.getOperand(0).getImm();
+}
+
+void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI,
+ bool bInsertAtFront) {
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ if (bInsertAtFront && AddMI)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+ false);
+ MCInst const *Extender = nullptr;
+ // Copy the bundle for the shuffling.
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+ if (!HexagonMCInstrInfo::isImmext(*MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+ false);
+ Extender = nullptr;
+ } else
+ Extender = MI;
+ }
+ if (!bInsertAtFront && AddMI)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+ false);
+ }
+
+ BundleFlags = MCB.getOperand(0).getImm();
+}
+
+void HexagonMCShuffler::copyTo(MCInst &MCB) {
+ MCB.clear();
+ MCB.addOperand(MCOperand::createImm(BundleFlags));
+ // Copy the results into the bundle.
+ for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
+
+ MCInst const *MI = I->getDesc();
+ MCInst const *Extender = I->getExtender();
+ if (Extender)
+ MCB.addOperand(MCOperand::createInst(Extender));
+ MCB.addOperand(MCOperand::createInst(MI));
+ }
+}
+
+bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
+ if (shuffle()) {
+ // Copy the results into the bundle.
+ copyTo(MCB);
+ } else
+ DEBUG(MCB.dump());
+
+ return (!getError());
+}
+
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB) {
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+
+ if (DisableShuffle)
+ // Ignore if user chose so.
+ return false;
+
+ if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+ // There once was a bundle:
+ // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+ // * %D2<def> = IMPLICIT_DEF; flags:
+ // * %D7<def> = IMPLICIT_DEF; flags:
+ // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+ // became empty.
+ DEBUG(dbgs() << "Skipping empty bundle");
+ return false;
+ } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+ DEBUG(dbgs() << "Skipping stand-alone insn");
+ return false;
+ }
+
+ // Reorder the bundle and copy the result.
+ if (!MCS.reshuffleTo(MCB)) {
+ // Unless there is any error, which should not happen at this point.
+ unsigned shuffleError = MCS.getError();
+ switch (shuffleError) {
+ default:
+ llvm_unreachable("unknown error");
+ case HexagonShuffler::SHUFFLE_ERROR_INVALID:
+ llvm_unreachable("invalid packet");
+ case HexagonShuffler::SHUFFLE_ERROR_STORES:
+ llvm_unreachable("too many stores");
+ case HexagonShuffler::SHUFFLE_ERROR_LOADS:
+ llvm_unreachable("too many loads");
+ case HexagonShuffler::SHUFFLE_ERROR_BRANCHES:
+ llvm_unreachable("too many branches");
+ case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS:
+ llvm_unreachable("no suitable slot");
+ case HexagonShuffler::SHUFFLE_ERROR_SLOTS:
+ llvm_unreachable("over-subscribed slots");
+ case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case.
+ return true;
+ }
+ }
+
+ return true;
+}
+
+unsigned
+llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ SmallVector<DuplexCandidate, 8> possibleDuplexes) {
+
+ if (DisableShuffle)
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+
+ if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+ // There once was a bundle:
+ // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+ // * %D2<def> = IMPLICIT_DEF; flags:
+ // * %D7<def> = IMPLICIT_DEF; flags:
+ // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+ // became empty.
+ DEBUG(dbgs() << "Skipping empty bundle");
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+ DEBUG(dbgs() << "Skipping stand-alone insn");
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ }
+
+ bool doneShuffling = false;
+ unsigned shuffleError;
+ while (possibleDuplexes.size() > 0 && (!doneShuffling)) {
+ // case of Duplex Found
+ DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
+ MCInst Attempt(MCB);
+ HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
+ HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler
+ if (MCS.size() == 1) { // case of one duplex
+ // copy the created duplex in the shuffler to the bundle
+ MCS.copyTo(MCB);
+ doneShuffling = true;
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+ }
+ // try shuffle with this duplex
+ doneShuffling = MCS.reshuffleTo(MCB);
+ shuffleError = MCS.getError();
+
+ if (doneShuffling)
+ break;
+ }
+
+ if (doneShuffling == false) {
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ doneShuffling = MCS.reshuffleTo(MCB); // shuffle
+ shuffleError = MCS.getError();
+ }
+ if (!doneShuffling)
+ return shuffleError;
+
+ return HexagonShuffler::SHUFFLE_SUCCESS;
+}
+
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB, MCInst const *AddMI, int fixupCount) {
+ if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI)
+ return false;
+
+ // if fixups present, make sure we don't insert too many nops that would
+ // later prevent an extender from being inserted.
+ unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+ if (bundleSize >= HEXAGON_PACKET_SIZE)
+ return false;
+ if (fixupCount >= 2) {
+ return false;
+ } else {
+ if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount)
+ return false;
+ }
+
+ if (DisableShuffle)
+ return false;
+
+ HexagonMCShuffler MCS(MCII, STI, MCB, AddMI);
+ if (!MCS.reshuffleTo(MCB)) {
+ unsigned shuffleError = MCS.getError();
+ switch (shuffleError) {
+ default:
+ return false;
+ case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case
+ return true;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
new file mode 100644
index 0000000..a21cce1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -0,0 +1,65 @@
+//=-- HexagonMCShuffler.h ---------------------------------------------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCSHUFFLER_H
+#define HEXAGONMCSHUFFLER_H
+
+#include "MCTargetDesc/HexagonShuffler.h"
+
+namespace llvm {
+
+class MCInst;
+
+// Insn bundle shuffler.
+class HexagonMCShuffler : public HexagonShuffler {
+ bool immext_present;
+ bool duplex_present;
+
+public:
+ HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB)
+ : HexagonShuffler(MCII, STI) {
+ init(MCB);
+ };
+ HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB, const MCInst *AddMI,
+ bool bInsertAtFront = false)
+ : HexagonShuffler(MCII, STI) {
+ init(MCB, AddMI, bInsertAtFront);
+ };
+
+ // Copy reordered bundle to another.
+ void copyTo(MCInst &MCB);
+ // Reorder and copy result to another.
+ bool reshuffleTo(MCInst &MCB);
+
+ bool immextPresent() const { return immext_present; };
+ bool duplexPresent() const { return duplex_present; };
+
+private:
+ void init(MCInst &MCB);
+ void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false);
+};
+
+// Invocation of the shuffler.
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &);
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &, const MCInst *, int);
+unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &,
+ SmallVector<DuplexCandidate, 8>);
+}
+
+#endif // HEXAGONMCSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 0000000..9a29257
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,244 @@
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "Hexagon.h"
+#include "HexagonMCAsmInfo.h"
+#include "HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+cl::opt<bool> llvm::HexagonDisableCompound
+ ("mno-compound",
+ cl::desc("Disable looking for compound instructions for Hexagon"));
+
+cl::opt<bool> llvm::HexagonDisableDuplex
+ ("mno-pairing",
+ cl::desc("Disable looking for duplex instructions for Hexagon"));
+
+StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+ if (CPU.empty())
+ CPU = "hexagonv60";
+ return CPU;
+}
+
+MCInstrInfo *llvm::createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R0);
+ return X;
+}
+
+static MCSubtargetInfo *
+createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
+ return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+namespace {
+class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
+public:
+ HexagonTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &, bool,
+ MCInstPrinter &)
+ : HexagonTargetStreamer(S) {}
+ void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
+ const MCInst &Inst, const MCSubtargetInfo &STI) override {
+ assert(HexagonMCInstrInfo::isBundle(Inst));
+ assert(HexagonMCInstrInfo::bundleSize(Inst) <= HEXAGON_PACKET_SIZE);
+ std::string Buffer;
+ {
+ raw_string_ostream TempStream(Buffer);
+ InstPrinter.printInst(&Inst, TempStream, "", STI);
+ }
+ StringRef Contents(Buffer);
+ auto PacketBundle = Contents.rsplit('\n');
+ auto HeadTail = PacketBundle.first.split('\n');
+ StringRef Separator = "\n";
+ StringRef Indent = "\t\t";
+ OS << "\t{\n";
+ while (!HeadTail.first.empty()) {
+ StringRef InstTxt;
+ auto Duplex = HeadTail.first.split('\v');
+ if (!Duplex.second.empty()) {
+ OS << Indent << Duplex.first << Separator;
+ InstTxt = Duplex.second;
+ } else if (!HeadTail.first.trim().startswith("immext")) {
+ InstTxt = Duplex.first;
+ }
+ if (!InstTxt.empty())
+ OS << Indent << InstTxt << Separator;
+ HeadTail = HeadTail.second.split('\n');
+ }
+ OS << "\t}" << PacketBundle.second;
+ }
+};
+}
+
+namespace {
+class HexagonTargetELFStreamer : public HexagonTargetStreamer {
+public:
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
+ HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
+ : HexagonTargetStreamer(S) {
+ auto Bits = STI.getFeatureBits();
+ unsigned Flags;
+ if (Bits.to_ullong() & llvm::Hexagon::ArchV5)
+ Flags = ELF::EF_HEXAGON_MACH_V5;
+ else
+ Flags = ELF::EF_HEXAGON_MACH_V4;
+ getStreamer().getAssembler().setELFHeaderEFlags(Flags);
+ }
+ void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize) override {
+ HexagonMCELFStreamer &HexagonELFStreamer =
+ static_cast<HexagonMCELFStreamer &>(getStreamer());
+ HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment,
+ AccessSize);
+ }
+ void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize) override {
+ HexagonMCELFStreamer &HexagonELFStreamer =
+ static_cast<HexagonMCELFStreamer &>(getStreamer());
+ HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(
+ Symbol, Size, ByteAlignment, AccessSize);
+ }
+};
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT) {
+ MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
+
+ // VirtualFP = (R30 + #0).
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
+ MAI->addInitialFrameState(Inst);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->initMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0)
+ return (new HexagonInstPrinter(MAI, MII, MRI));
+ else
+ return nullptr;
+}
+
+static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool IsVerboseAsm) {
+ return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint);
+}
+
+static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ return createHexagonELFStreamer(Context, MAB, OS, Emitter);
+}
+
+static MCTargetStreamer *
+createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
+ return new HexagonTargetELFStreamer(S, STI);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+ createHexagonMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
+ createHexagonMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+ createHexagonMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+ createHexagonMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget,
+ createHexagonMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
+ createHexagonAsmBackend);
+
+ // Register the obj streamer
+ TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer);
+
+ // Register the asm streamer
+ TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget,
+ createMCAsmTargetStreamer);
+
+ // Register the MC Inst Printer
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
+
+ TargetRegistry::RegisterObjectTargetStreamer(
+ TheHexagonTarget, createHexagonObjectTargetStreamer);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 0000000..a005a01
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,75 @@
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+
+#include <cstdint>
+
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+struct InstrItinerary;
+struct InstrStage;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class Triple;
+class StringRef;
+class raw_ostream;
+class raw_pwrite_stream;
+
+extern Target TheHexagonTarget;
+extern cl::opt<bool> HexagonDisableCompound;
+extern cl::opt<bool> HexagonDisableDuplex;
+extern const InstrStage HexagonStages[];
+
+MCInstrInfo *createHexagonMCInstrInfo();
+
+MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &MCT);
+
+MCAsmBackend *createHexagonAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI, StringRef CPU);
+
+namespace HEXAGON_MC {
+ StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+}
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
new file mode 100644
index 0000000..6ceb848
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -0,0 +1,470 @@
+//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-shuffle"
+
+#include <algorithm>
+#include <utility>
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "HexagonShuffler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// Insn shuffling priority.
+class HexagonBid {
+ // The priority is directly proportional to how restricted the insn is based
+ // on its flexibility to run on the available slots. So, the fewer slots it
+ // may run on, the higher its priority.
+ enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15.
+ unsigned Bid;
+
+public:
+ HexagonBid() : Bid(0){};
+ HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; };
+
+ // Check if the insn priority is overflowed.
+ bool isSold() const { return (Bid >= MAX); };
+
+ HexagonBid &operator+=(const HexagonBid &B) {
+ Bid += B.Bid;
+ return *this;
+ };
+};
+
+// Slot shuffling allocation.
+class HexagonUnitAuction {
+ HexagonBid Scores[HEXAGON_PACKET_SIZE];
+ // Mask indicating which slot is unavailable.
+ unsigned isSold : HEXAGON_PACKET_SIZE;
+
+public:
+ HexagonUnitAuction() : isSold(0){};
+
+ // Allocate slots.
+ bool bid(unsigned B) {
+ // Exclude already auctioned slots from the bid.
+ unsigned b = B & ~isSold;
+ if (b) {
+ for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i)
+ if (b & (1 << i)) {
+ // Request candidate slots.
+ Scores[i] += HexagonBid(b);
+ isSold |= Scores[i].isSold() << i;
+ }
+ return true;
+ ;
+ } else
+ // Error if the desired slots are already full.
+ return false;
+ };
+};
+} // end anonymous namespace
+
+unsigned HexagonResource::setWeight(unsigned s) {
+ const unsigned SlotWeight = 8;
+ const unsigned MaskWeight = SlotWeight - 1;
+ bool Key = (1 << s) & getUnits();
+
+ // TODO: Improve this API so that we can prevent misuse statically.
+ assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+
+ // Calculate relative weight of the insn for the given slot, weighing it the
+ // heavier the more restrictive the insn is and the lowest the slots that the
+ // insn may be executed in.
+ Weight =
+ (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits()))
+ << countTrailingZeros(getUnits()));
+ return (Weight);
+}
+
+HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
+
+bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
+
+bool HexagonCVIResource::setup() {
+ assert(!TUL);
+ TUL = new (TypeUnitsAndLanes);
+
+ (*TUL)[HexagonII::TypeCVI_VA] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
+ (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_ST] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
+
+ return true;
+}
+
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+ MCInst const *id)
+ : HexagonResource(s) {
+ unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
+
+ if (TUL->count(T)) {
+ // For an HVX insn.
+ Valid = true;
+ setUnits((*TUL)[T].first);
+ setLanes((*TUL)[T].second);
+ setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+ setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
+ } else {
+ // For core insns.
+ Valid = false;
+ setUnits(0);
+ setLanes(0);
+ setLoad(false);
+ setStore(false);
+ }
+}
+
+HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI)
+ : MCII(MCII), STI(STI) {
+ reset();
+}
+
+void HexagonShuffler::reset() {
+ Packet.clear();
+ BundleFlags = 0;
+ Error = SHUFFLE_SUCCESS;
+}
+
+void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
+ unsigned S, bool X) {
+ HexagonInstr PI(MCII, ID, Extender, S, X);
+
+ Packet.push_back(PI);
+}
+
+/// Check that the packet is legal and enforce relative insn order.
+bool HexagonShuffler::check() {
+ // Descriptive slot masks.
+ const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
+ slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4,
+ slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
+ // Highest slots for branches and stores used to keep their original order.
+ unsigned slotJump = slotFirstJump;
+ unsigned slotLoadStore = slotFirstLoadStore;
+ // Number of branches, solo branches, indirect branches.
+ unsigned jumps = 0, jump1 = 0, jumpr = 0;
+ // Number of memory operations, loads, solo loads, stores, solo stores, single
+ // stores.
+ unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ // Number of HVX loads, HVX stores.
+ unsigned CVIloads = 0, CVIstores = 0;
+ // Number of duplex insns, solo insns.
+ unsigned duplex = 0, solo = 0;
+ // Number of insns restricting other insns in the packet to A and X types,
+ // which is neither A or X types.
+ unsigned onlyAX = 0, neitherAnorX = 0;
+ // Number of insns restricting other insns in slot #1 to A type.
+ unsigned onlyAin1 = 0;
+ // Number of insns restricting any insn in slot #1, except A2_nop.
+ unsigned onlyNo1 = 0;
+ unsigned xtypeFloat = 0;
+ unsigned pSlot3Cnt = 0;
+ iterator slot3ISJ = end();
+
+ // Collect information from the insns in the packet.
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+
+ if (HexagonMCInstrInfo::isSolo(MCII, *ID))
+ solo += !ISJ->isSoloException();
+ else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID))
+ onlyAX += !ISJ->isSoloException();
+ else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID))
+ onlyAin1 += !ISJ->isSoloException();
+ if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 &&
+ HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE)
+ ++neitherAnorX;
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) {
+ ++pSlot3Cnt;
+ slot3ISJ = ISJ;
+ }
+
+ switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
+ case HexagonII::TypeXTYPE:
+ if (HexagonMCInstrInfo::isFloat(MCII, *ID))
+ ++xtypeFloat;
+ break;
+ case HexagonII::TypeJR:
+ ++jumpr;
+ // Fall-through.
+ case HexagonII::TypeJ:
+ ++jumps;
+ break;
+ case HexagonII::TypeCVI_VM_VP_LDU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_LD:
+ case HexagonII::TypeCVI_VM_TMP_LD:
+ case HexagonII::TypeCVI_VM_CUR_LD:
+ ++CVIloads;
+ case HexagonII::TypeLD:
+ ++loads;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleLoad)
+ ++load0;
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
+ ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ break;
+ case HexagonII::TypeCVI_VM_STU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_ST:
+ case HexagonII::TypeCVI_VM_NEW_ST:
+ ++CVIstores;
+ case HexagonII::TypeST:
+ ++stores;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleStore)
+ ++store0;
+ break;
+ case HexagonII::TypeMEMOP:
+ ++loads;
+ ++stores;
+ ++store1;
+ ++memory;
+ break;
+ case HexagonII::TypeNV:
+ ++memory; // NV insns are memory-like.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch())
+ ++jumps, ++jump1;
+ break;
+ case HexagonII::TypeCR:
+ // Legacy conditional branch predicated on a register.
+ case HexagonII::TypeSYSTEM:
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad())
+ ++loads;
+ break;
+ }
+ }
+
+ // Check if the packet is legal.
+ if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
+ (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
+ (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
+ Error = SHUFFLE_ERROR_INVALID;
+ return false;
+ }
+
+ if (jump1 && jumps > 1) {
+ // Error if single branch with another branch.
+ Error = SHUFFLE_ERROR_BRANCHES;
+ return false;
+ }
+
+ // Modify packet accordingly.
+ // TODO: need to reserve slots #0 and #1 for duplex insns.
+ bool bOnlySlot3 = false;
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+
+ if (!ISJ->Core.getUnits()) {
+ // Error if insn may not be executed in any slot.
+ Error = SHUFFLE_ERROR_UNKNOWN;
+ return false;
+ }
+
+ // Exclude from slot #1 any insn but A2_nop.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop)
+ if (onlyNo1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+
+ // Exclude from slot #1 any insn but A-type.
+ if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32)
+ if (onlyAin1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+
+ // Branches must keep the original order.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
+ HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
+ if (jumps > 1) {
+ if (jumpr || slotJump < slotLastJump) {
+ // Error if indirect branch with another branch or
+ // no more slots available for branches.
+ Error = SHUFFLE_ERROR_BRANCHES;
+ return false;
+ }
+ // Pin the branch to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump);
+ // Update next highest slot available to branches.
+ slotJump >>= 1;
+ }
+
+ // A single load must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) {
+ if (loads == 1 && loads == memory)
+ // Pin the load to slot #0.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
+ }
+
+ // A single store must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) {
+ if (!store0) {
+ if (stores == 1)
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
+ else if (stores > 1) {
+ if (slotLoadStore < slotLastLoadStore) {
+ // Error if no more slots available for stores.
+ Error = SHUFFLE_ERROR_STORES;
+ return false;
+ }
+ // Pin the store to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
+ // Update the next highest slot available to stores.
+ slotLoadStore >>= 1;
+ }
+ }
+ if (store1 && stores > 1) {
+ // Error if a single store with another store.
+ Error = SHUFFLE_ERROR_STORES;
+ return false;
+ }
+ }
+
+ // flag if an instruction can only be executed in slot 3
+ if (ISJ->Core.getUnits() == slotThree)
+ bOnlySlot3 = true;
+
+ if (!ISJ->Core.getUnits()) {
+ // Error if insn may not be executed in any slot.
+ Error = SHUFFLE_ERROR_NOSLOTS;
+ return false;
+ }
+ }
+
+ bool validateSlots = true;
+ if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ // save off slot mask of instruction marked with A_PREFER_SLOT3
+ // and then pin it to slot #3
+ unsigned saveUnits = slot3ISJ->Core.getUnits();
+ slot3ISJ->Core.setUnits(saveUnits & slotThree);
+
+ HexagonUnitAuction AuctionCore;
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+
+ // see if things ok with that instruction being pinned to slot #3
+ bool bFail = false;
+ for (iterator I = begin(); I != end() && bFail != true; ++I)
+ if (!AuctionCore.bid(I->Core.getUnits()))
+ bFail = true;
+
+ // if yes, great, if not then restore original slot mask
+ if (!bFail)
+ validateSlots = false; // all good, no need to re-do auction
+ else
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const *ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID))
+ ISJ->Core.setUnits(saveUnits);
+ }
+ }
+
+ // Check if any slot, core, is over-subscribed.
+ // Verify the core slot subscriptions.
+ if (validateSlots) {
+ HexagonUnitAuction AuctionCore;
+
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+
+ for (iterator I = begin(); I != end(); ++I)
+ if (!AuctionCore.bid(I->Core.getUnits())) {
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
+ }
+ // Verify the CVI slot subscriptions.
+ {
+ HexagonUnitAuction AuctionCVI;
+
+ std::sort(begin(), end(), HexagonInstr::lessCVI);
+
+ for (iterator I = begin(); I != end(); ++I)
+ for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
+ if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
+ }
+
+ Error = SHUFFLE_SUCCESS;
+ return true;
+}
+
+bool HexagonShuffler::shuffle() {
+ if (size() > HEXAGON_PACKET_SIZE) {
+ // Ignore a packet with with more than what a packet can hold
+ // or with compound or duplex insns for now.
+ Error = SHUFFLE_ERROR_INVALID;
+ return false;
+ }
+
+ // Check and prepare packet.
+ if (size() > 1 && check())
+ // Reorder the handles for each slot.
+ for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE;
+ ++nSlot) {
+ iterator ISJ, ISK;
+ unsigned slotSkip, slotWeight;
+
+ // Prioritize the handles considering their restrictions.
+ for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0;
+ ISK != Packet.end(); ++ISK, ++slotSkip)
+ if (slotSkip < nSlot - emptySlots)
+ // Note which handle to begin at.
+ ++ISJ;
+ else
+ // Calculate the weight of the slot.
+ slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1);
+
+ if (slotWeight)
+ // Sort the packet, favoring source order,
+ // beginning after the previous slot.
+ std::sort(ISJ, Packet.end());
+ else
+ // Skip unused slot.
+ ++emptySlots;
+ }
+
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
+ DEBUG(dbgs().write_hex(ISJ->Core.getUnits());
+ dbgs() << ':'
+ << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc())
+ .getOpcode();
+ dbgs() << '\n');
+ DEBUG(dbgs() << '\n');
+
+ return (!getError());
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
new file mode 100644
index 0000000..174f10f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -0,0 +1,184 @@
+//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONSHUFFLER_H
+#define HEXAGONSHUFFLER_H
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+// Insn resources.
+class HexagonResource {
+ // Mask of the slots or units that may execute the insn and
+ // the weight or priority that the insn requires to be assigned a slot.
+ unsigned Slots, Weight;
+
+public:
+ HexagonResource(unsigned s) { setUnits(s); };
+
+ void setUnits(unsigned s) {
+ Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
+ };
+ unsigned setWeight(unsigned s);
+
+ unsigned getUnits() const { return (Slots); };
+ unsigned getWeight() const { return (Weight); };
+
+ // Check if the resources are in ascending slot order.
+ static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
+ return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
+ };
+ // Check if the resources are in ascending weight order.
+ static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
+ return (A.getWeight() < B.getWeight());
+ };
+};
+
+// HVX insn resources.
+class HexagonCVIResource : public HexagonResource {
+ typedef std::pair<unsigned, unsigned> UnitsAndLanes;
+ typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+
+ // Available HVX slots.
+ enum {
+ CVI_NONE = 0,
+ CVI_XLANE = 1 << 0,
+ CVI_SHIFT = 1 << 1,
+ CVI_MPY0 = 1 << 2,
+ CVI_MPY1 = 1 << 3
+ };
+
+ static bool SetUp;
+ static bool setup();
+ static TypeUnitsAndLanes *TUL;
+
+ // Count of adjacent slots that the insn requires to be executed.
+ unsigned Lanes;
+ // Flag whether the insn is a load or a store.
+ bool Load, Store;
+ // Flag whether the HVX resources are valid.
+ bool Valid;
+
+ void setLanes(unsigned l) { Lanes = l; };
+ void setLoad(bool f = true) { Load = f; };
+ void setStore(bool f = true) { Store = f; };
+
+public:
+ HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+
+ bool isValid() const { return (Valid); };
+ unsigned getLanes() const { return (Lanes); };
+ bool mayLoad() const { return (Load); };
+ bool mayStore() const { return (Store); };
+};
+
+// Handle to an insn used by the shuffling algorithm.
+class HexagonInstr {
+ friend class HexagonShuffler;
+
+ MCInst const *ID;
+ MCInst const *Extender;
+ HexagonResource Core;
+ HexagonCVIResource CVI;
+ bool SoloException;
+
+public:
+ HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+ MCInst const *Extender, unsigned s, bool x = false)
+ : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
+ SoloException(x){};
+
+ MCInst const *getDesc() const { return (ID); };
+
+ MCInst const *getExtender() const { return Extender; }
+
+ unsigned isSoloException() const { return (SoloException); };
+
+ // Check if the handles are in ascending order for shuffling purposes.
+ bool operator<(const HexagonInstr &B) const {
+ return (HexagonResource::lessWeight(B.Core, Core));
+ };
+ // Check if the handles are in ascending order by core slots.
+ static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
+ return (HexagonResource::lessUnits(A.Core, B.Core));
+ };
+ // Check if the handles are in ascending order by HVX slots.
+ static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) {
+ return (HexagonResource::lessUnits(A.CVI, B.CVI));
+ };
+};
+
+// Bundle shuffler.
+class HexagonShuffler {
+ typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE>
+ HexagonPacket;
+
+ // Insn handles in a bundle.
+ HexagonPacket Packet;
+
+ // Shuffling error code.
+ unsigned Error;
+
+protected:
+ int64_t BundleFlags;
+ MCInstrInfo const &MCII;
+ MCSubtargetInfo const &STI;
+
+public:
+ typedef HexagonPacket::iterator iterator;
+
+ enum {
+ SHUFFLE_SUCCESS = 0, ///< Successful operation.
+ SHUFFLE_ERROR_INVALID, ///< Invalid bundle.
+ SHUFFLE_ERROR_STORES, ///< No free slots for store insns.
+ SHUFFLE_ERROR_LOADS, ///< No free slots for load insns.
+ SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
+ SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns.
+ SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots.
+ SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
+ SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
+ SHUFFLE_ERROR_UNKNOWN ///< Unknown error.
+ };
+
+ explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
+
+ // Reset to initial state.
+ void reset();
+ // Check if the bundle may be validly shuffled.
+ bool check();
+ // Reorder the insn handles in the bundle.
+ bool shuffle();
+
+ unsigned size() const { return (Packet.size()); };
+
+ iterator begin() { return (Packet.begin()); };
+ iterator end() { return (Packet.end()); };
+
+ // Add insn handle to the bundle .
+ void append(MCInst const *ID, MCInst const *Extender, unsigned S,
+ bool X = false);
+
+ // Return the error code for the last check or shuffling of the bundle.
+ void setError(unsigned Err) { Error = Err; };
+ unsigned getError() const { return (Error); };
+};
+}
+
+#endif // HEXAGONSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 0000000..40f6c8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon");
+}
OpenPOWER on IntegriCloud