109 files changed, 66628 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
new file mode 100644
index 0000000..a8622a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -0,0 +1,2152 @@
+//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcasmparser"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCExpr.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableFutureRegs("mfuture-regs",
+                                      cl::desc("Enable future registers"));
+
+static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
+cl::desc("Warn for missing parenthesis around predicate registers"),
+cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
+cl::desc("Error for missing parenthesis around predicate registers"),
+cl::init(false));
+static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
+cl::desc("Warn for mismatching a signed and unsigned value"),
+cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
+cl::desc("Warn for register names that arent contigious"),
+cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
+cl::desc("Error for register names that aren't contigious"),
+cl::init(false));
+
+
+namespace {
+struct HexagonOperand;
+
+class HexagonAsmParser : public MCTargetAsmParser {
+
+  HexagonTargetStreamer &getTargetStreamer() {
+    MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+    return static_cast<HexagonTargetStreamer &>(TS);
+  }
+
+  MCAsmParser &Parser;
+  MCAssembler *Assembler;
+  MCInstrInfo const &MCII;
+  MCInst MCB;
+  bool InBrackets;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAssembler *getAssembler() const { return Assembler; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  bool equalIsAsmAssignment() override { return false; }
+  bool isLabel(AsmToken &Token) override;
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+  bool ParseDirectiveFalign(unsigned Size, SMLoc L);
+
+  virtual bool ParseRegister(unsigned &RegNo,
+                             SMLoc &StartLoc,
+                             SMLoc &EndLoc) override;
+  bool ParseDirectiveSubsection(SMLoc L);
+  bool ParseDirectiveValue(unsigned Size, SMLoc L);
+  bool ParseDirectiveComm(bool IsLocal, SMLoc L);
+  bool RegisterMatchesArch(unsigned MatchNum) const;
+
+  bool matchBundleOptions();
+  bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc);
+  bool finishBundle(SMLoc IDLoc, MCStreamer &Out);
+  void canonicalizeImmediates(MCInst &MCI);
+  bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc,
+                           OperandVector &InstOperands, uint64_t &ErrorInfo,
+                           bool MatchingInlineAsm, bool &MustExtend);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+  void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+  int processInstruction(MCInst &Inst, OperandVector const &Operands,
+                         SMLoc IDLoc, bool &MustExtend);
+
+  // Check if we have an assembler and, if so, set the ELF e_header flags.
+  void chksetELFHeaderEFlags(unsigned flags) {
+    if (getAssembler())
+      getAssembler()->setELFHeaderEFlags(flags);
+  }
+
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "HexagonGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+                   const MCInstrInfo &MII, const MCTargetOptions &Options)
+    : MCTargetAsmParser(Options, _STI), Parser(_Parser),
+      MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+  MCAsmParserExtension::Initialize(_Parser);
+
+  Assembler = nullptr;
+  // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+  if (!Parser.getStreamer().hasRawTextSupport()) {
+    MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+    Assembler = &MES->getAssembler();
+  }
+  }
+
+  bool mustExtend(OperandVector &Operands);
+  bool splitIdentifier(OperandVector &Operands);
+  bool parseOperand(OperandVector &Operands);
+  bool parseInstruction(OperandVector &Operands);
+  bool implicitExpressionLocation(OperandVector &Operands);
+  bool parseExpressionOrOperand(OperandVector &Operands);
+  bool parseExpression(MCExpr const *& Expr);
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                SMLoc NameLoc, OperandVector &Operands) override
+  {
+    llvm_unreachable("Unimplemented");
+  }
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                AsmToken ID, OperandVector &Operands) override;
+
+  virtual bool ParseDirective(AsmToken DirectiveID) override;
+};
+
+/// HexagonOperand - Instances of this class represent a parsed Hexagon machine
+/// instruction.
+struct HexagonOperand : public MCParsedAsmOperand {
+  enum KindTy { Token, Immediate, Register } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  struct TokTy {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegTy {
+    unsigned RegNum;
+  };
+
+  struct ImmTy {
+    const MCExpr *Val;
+    bool MustExtend;
+  };
+
+  struct InstTy {
+    OperandVector *SubInsts;
+  };
+
+  union {
+    struct TokTy Tok;
+    struct RegTy Reg;
+    struct ImmTy Imm;
+  };
+
+  HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+public:
+  HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isMem() const { llvm_unreachable("No isMem"); }
+  bool isReg() const { return Kind == Register; }
+
+  bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
+                     bool isRelocatable, bool Extendable) const {
+    if (Kind == Immediate) {
+      const MCExpr *myMCExpr = getImm();
+      if (Imm.MustExtend && !Extendable)
+        return false;
+      int64_t Res;
+      if (myMCExpr->evaluateAsAbsolute(Res)) {
+        int bits = immBits + zeroBits;
+        // Field bit range is zerobits + bits
+        // zeroBits must be 0
+        if (Res & ((1 << zeroBits) - 1))
+          return false;
+        if (isSigned) {
+          if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1)))
+            return true;
+        } else {
+          if (bits == 64)
+            return true;
+          if (Res >= 0)
+            return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+          else {
+            const int64_t high_bit_set = 1ULL << 63;
+            const uint64_t mask = (high_bit_set >> (63 - bits));
+            return (((uint64_t)Res & mask) == mask) ? true : false;
+          }
+        }
+      } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
+        return true;
+      else if (myMCExpr->getKind() == MCExpr::Binary ||
+               myMCExpr->getKind() == MCExpr::Unary)
+        return true;
+    }
+    return false;
+  }
+
+  bool isf32Ext() const { return false; }
+  bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+  bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
+  bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+  bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
+  bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
+  bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
+  bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
+  bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
+  bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
+  bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+
+  bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
+  bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+  bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
+  bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
+  bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
+  bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
+  bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
+  bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
+  bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
+  bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+  bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+  bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
+  bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
+  bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
+  bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+  bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
+  bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
+  bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
+  bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
+  bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
+  bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+  bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+
+  bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+  bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+  bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+  bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+  bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+  bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+  bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+  bool iss11_0Ext() const {
+    return CheckImmRange(11 + 26, 0, true, true, true);
+  }
+  bool iss11_1Ext() const {
+    return CheckImmRange(11 + 26, 1, true, true, true);
+  }
+  bool iss11_2Ext() const {
+    return CheckImmRange(11 + 26, 2, true, true, true);
+  }
+  bool iss11_3Ext() const {
+    return CheckImmRange(11 + 26, 3, true, true, true);
+  }
+
+  bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+  bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+  bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+  bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+  bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+  bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+  bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
+  bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
+  bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
+  bool isu32MustExt() const { return isImm() && Imm.MustExtend; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createExpr(getImm()));
+  }
+
+  void addSignedImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    MCExpr const *Expr = getImm();
+    int64_t Value;
+    if (!Expr->evaluateAsAbsolute(Value)) {
+      Inst.addOperand(MCOperand::createExpr(Expr));
+      return;
+    }
+    int64_t Extended = SignExtend64 (Value, 32);
+    if ((Extended < 0) == (Value < 0)) {
+      Inst.addOperand(MCOperand::createExpr(Expr));
+      return;
+    }
+    // Flip bit 33 to signal signed unsigned mismatch
+    Extended ^= 0x100000000;
+    Inst.addOperand(MCOperand::createImm(Extended));
+  }
+
+  void addf32ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+
+  void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu7ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+
+  void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+  }
+
+  void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
+    HexagonOperand *Op = new HexagonOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+
+  static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S,
+                                                   SMLoc E) {
+    HexagonOperand *Op = new HexagonOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+
+  static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S,
+                                                   SMLoc E) {
+    HexagonOperand *Op = new HexagonOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->Imm.MustExtend = false;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+};
+
+} // end anonymous namespace.
+
+void HexagonOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Immediate:
+    getImm()->print(OS, nullptr);
+    break;
+  case Register:
+    OS << "<register R";
+    OS << getReg() << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+static unsigned MatchRegisterName(StringRef Name);
+
+bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
+  DEBUG(dbgs() << "Bundle:");
+  DEBUG(MCB.dump_pretty(dbgs()));
+  DEBUG(dbgs() << "--\n");
+
+  // Check the bundle for errors.
+  const MCRegisterInfo *RI = getContext().getRegisterInfo();
+  HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+
+  bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
+                                                        getContext(), MCB,
+                                                        &Check);
+
+  while (Check.getNextErrInfo() == true) {
+    unsigned Reg = Check.getErrRegister();
+    Twine R(RI->getName(Reg));
+
+    uint64_t Err = Check.getError();
+    if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
+      if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
+        Error(IDLoc,
+              "unconditional branch cannot precede another branch in packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
+          HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
+        Error(IDLoc, "register `" + R +
+                         "' used with `.new' "
+                         "but not validly modified in the same packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
+        Error(IDLoc, "register `" + R + "' modified more than once");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
+        Error(IDLoc, "cannot write to read-only register `" + R + "'");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
+        Error(IDLoc, "loop-setup and some branch instructions "
+                     "cannot be in the same packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
+        Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+        Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+                         "cannot contain instructions that modify register " +
+                         "`" + R + "'");
+      }
+
+      if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
+        Error(IDLoc,
+              "instruction cannot appear in packet with other instructions");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
+        Error(IDLoc, "too many slots used in packet");
+
+      if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
+        uint64_t Erm = Check.getShuffleError();
+
+        if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
+          Error(IDLoc, "invalid instruction packet");
+        else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
+          Error(IDLoc, "invalid instruction packet: too many stores");
+        else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
+          Error(IDLoc, "invalid instruction packet: too many loads");
+        else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
+          Error(IDLoc, "too many branches in packet");
+        else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
+          Error(IDLoc, "invalid instruction packet: out of slots");
+        else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
+          Error(IDLoc, "invalid instruction packet: slot error");
+        else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
+          Error(IDLoc, "v60 packet violation");
+        else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
+          Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+        else
+          Error(IDLoc, "unknown error in instruction packet");
+      }
+    }
+
+    unsigned Warn = Check.getWarning();
+    if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
+      if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
+        Warning(IDLoc, "register `" + R + "' used with `.cur' "
+                                          "but not used in the same packet");
+      else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
+        Warning(IDLoc, "register `" + R + "' used with `.tmp' "
+                                          "but not used in the same packet");
+    }
+  }
+
+  if (CheckOk) {
+    MCB.setLoc(IDLoc);
+    if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
+      assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
+      assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
+      // Empty packets are valid yet aren't emitted
+      return false;
+    }
+    Out.EmitInstruction(MCB, getSTI());
+  } else {
+    // If compounding and duplexing didn't reduce the size below
+    // 4 or less we have a packet that is too big.
+    if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+      Error(IDLoc, "invalid instruction packet: out of slots");
+      return true; // Error
+    }
+  }
+
+  return false; // No error
+}
+
+bool HexagonAsmParser::matchBundleOptions() {
+  MCAsmParser &Parser = getParser();
+  MCAsmLexer &Lexer = getLexer();
+  while (true) {
+    if (!Parser.getTok().is(AsmToken::Colon))
+      return false;
+    Lexer.Lex();
+    StringRef Option = Parser.getTok().getString();
+    if (Option.compare_lower("endloop0") == 0)
+      HexagonMCInstrInfo::setInnerLoop(MCB);
+    else if (Option.compare_lower("endloop1") == 0)
+      HexagonMCInstrInfo::setOuterLoop(MCB);
+    else if (Option.compare_lower("mem_noshuf") == 0)
+      HexagonMCInstrInfo::setMemReorderDisabled(MCB);
+    else if (Option.compare_lower("mem_shuf") == 0)
+      HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
+    else
+      return true;
+    Lexer.Lex();
+  }
+}
+
+// For instruction aliases, immediates are generated rather than
+// MCConstantExpr.  Convert them for uniform MCExpr.
+// Also check for signed/unsigned mismatches and warn
+void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
+  MCInst NewInst;
+  NewInst.setOpcode(MCI.getOpcode());
+  for (MCOperand &I : MCI)
+    if (I.isImm()) {
+      int64_t Value (I.getImm());
+      if ((Value & 0x100000000) != (Value & 0x80000000)) {
+        // Detect flipped bit 33 wrt bit 32 and signal warning
+        Value ^= 0x100000000;
+        if (WarnSignedMismatch)
+          Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
+      }
+      NewInst.addOperand(MCOperand::createExpr(
+          MCConstantExpr::create(Value, getContext())));
+    }
+    else
+      NewInst.addOperand(I);
+  MCI = NewInst;
+}
+
+bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
+                                           OperandVector &InstOperands,
+                                           uint64_t &ErrorInfo,
+                                           bool MatchingInlineAsm,
+                                           bool &MustExtend) {
+  // Perform matching with tablegen asmmatcher generated function
+  int result =
+      MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm);
+  if (result == Match_Success) {
+    MCI.setLoc(IDLoc);
+    MustExtend = mustExtend(InstOperands);
+    canonicalizeImmediates(MCI);
+    result = processInstruction(MCI, InstOperands, IDLoc, MustExtend);
+
+    DEBUG(dbgs() << "Insn:");
+    DEBUG(MCI.dump_pretty(dbgs()));
+    DEBUG(dbgs() << "\n\n");
+
+    MCI.setLoc(IDLoc);
+  }
+
+  // Create instruction operand for bundle instruction
+  //   Break this into a separate function Code here is less readable
+  //   Think about how to get an instruction error to report correctly.
+  //   SMLoc will return the "{"
+  switch (result) {
+  default:
+    break;
+  case Match_Success:
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "invalid instruction");
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction");
+  case Match_InvalidOperand:
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= InstOperands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get()))
+                     ->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool HexagonAsmParser::mustExtend(OperandVector &Operands) {
+  unsigned Count = 0;
+  for (std::unique_ptr<MCParsedAsmOperand> &i : Operands)
+    if (i->isImm())
+      if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend)
+        ++Count;
+  // Multiple extenders should have been filtered by iss9Ext et. al.
+  assert(Count < 2 && "Multiple extenders");
+  return Count == 1;
+}
+
+bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                               OperandVector &Operands,
+                                               MCStreamer &Out,
+                                               uint64_t &ErrorInfo,
+                                               bool MatchingInlineAsm) {
+  if (!InBrackets) {
+    MCB.clear();
+    MCB.addOperand(MCOperand::createImm(0));
+  }
+  HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]);
+  if (FirstOperand.isToken() && FirstOperand.getToken() == "{") {
+    assert(Operands.size() == 1 && "Brackets should be by themselves");
+    if (InBrackets) {
+      getParser().Error(IDLoc, "Already in a packet");
+      return true;
+    }
+    InBrackets = true;
+    return false;
+  }
+  if (FirstOperand.isToken() && FirstOperand.getToken() == "}") {
+    assert(Operands.size() == 1 && "Brackets should be by themselves");
+    if (!InBrackets) {
+      getParser().Error(IDLoc, "Not in a packet");
+      return true;
+    }
+    InBrackets = false;
+    if (matchBundleOptions())
+      return true;
+    return finishBundle(IDLoc, Out);
+  }
+  MCInst *SubInst = new (getParser().getContext()) MCInst;
+  bool MustExtend = false;
+  if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
+                          MatchingInlineAsm, MustExtend))
+    return true;
+  HexagonMCInstrInfo::extendIfNeeded(
+      getParser().getContext(), MCII, MCB, *SubInst,
+      HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend);
+  MCB.addOperand(MCOperand::createInst(SubInst));
+  if (!InBrackets)
+    return finishBundle(IDLoc, Out);
+  return false;
+}
+
+/// ParseDirective parses the Hexagon specific directives
+bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
+    return ParseDirectiveValue(4, DirectiveID.getLoc());
+  if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
+      IDVal.lower() == ".half")
+    return ParseDirectiveValue(2, DirectiveID.getLoc());
+  if (IDVal.lower() == ".falign")
+    return ParseDirectiveFalign(256, DirectiveID.getLoc());
+  if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
+    return ParseDirectiveComm(true, DirectiveID.getLoc());
+  if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common"))
+    return ParseDirectiveComm(false, DirectiveID.getLoc());
+  if (IDVal.lower() == ".subsection")
+    return ParseDirectiveSubsection(DirectiveID.getLoc());
+
+  return true;
+}
+bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
+  const MCExpr *Subsection = 0;
+  int64_t Res;
+
+  assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
+         "Invalid subsection directive");
+  getParser().parseExpression(Subsection);
+
+  if (!Subsection->evaluateAsAbsolute(Res))
+    return Error(L, "Cannot evaluate subsection number");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the
+  // negative subsections together and in the same order but at the opposite
+  // end of the section.  Only legacy hexagon-gcc created assembly code
+  // used negative subsections.
+  if ((Res < 0) && (Res > -8193))
+    Subsection = MCConstantExpr::create(8192 + Res, this->getContext());
+
+  getStreamer().SubSection(Subsection);
+  return false;
+}
+
+///  ::= .falign [expression]
+bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
+
+  int64_t MaxBytesToFill = 15;
+
+  // if there is an arguement
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    const MCExpr *Value;
+    SMLoc ExprLoc = L;
+
+    // Make sure we have a number (false is returned if expression is a number)
+    if (getParser().parseExpression(Value) == false) {
+      // Make sure this is a number that is in range
+      const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+      uint64_t IntValue = MCE->getValue();
+      if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
+        return Error(ExprLoc, "literal value out of range (256) for falign");
+      MaxBytesToFill = IntValue;
+      Lex();
+    } else {
+      return Error(ExprLoc, "not a valid expression for falign directive");
+    }
+  }
+
+  getTargetStreamer().emitFAlign(16, MaxBytesToFill);
+  Lex();
+
+  return false;
+}
+
+///  ::= .word [ expression (, expression)* ]
+bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+    for (;;) {
+      const MCExpr *Value;
+      SMLoc ExprLoc = L;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      // Special case constant expressions to match code generator.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size);
+      } else
+        getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+// This is largely a copy of AsmParser's ParseDirectiveComm extended to
+// accept a 3rd argument, AccessAlignment which indicates the smallest
+// memory access made to the symbol, expressed in bytes.  If no
+// AccessAlignment is specified it defaults to the Alignment Value.
+// Hexagon's .lcomm:
+//   .lcomm Symbol, Length, Alignment, AccessAlignment
+bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
+  // FIXME: need better way to detect if AsmStreamer (upstream removed
+  // getKind())
+  if (getStreamer().hasRawTextSupport())
+    return true; // Only object file output requires special treatment.
+
+  StringRef Name;
+  if (getParser().parseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().parseAbsoluteExpression(Size))
+    return true;
+
+  int64_t ByteAlignment = 1;
+  SMLoc ByteAlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    ByteAlignmentLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(ByteAlignment))
+      return true;
+    if (!isPowerOf2_64(ByteAlignment))
+      return Error(ByteAlignmentLoc, "alignment must be a power of 2");
+  }
+
+  int64_t AccessAlignment = 0;
+  if (getLexer().is(AsmToken::Comma)) {
+    // The optional access argument specifies the size of the smallest memory
+    //   access to be made to the symbol, expressed in bytes.
+    SMLoc AccessAlignmentLoc;
+    Lex();
+    AccessAlignmentLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(AccessAlignment))
+      return true;
+
+    if (!isPowerOf2_64(AccessAlignment))
+      return Error(AccessAlignmentLoc, "access alignment must be a power of 2");
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+  Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                          "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (ByteAlignment < 0)
+    return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                                   "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(Loc, "invalid symbol redefinition");
+
+  HexagonMCELFStreamer &HexagonELFStreamer =
+      static_cast<HexagonMCELFStreamer &>(getStreamer());
+  if (IsLocal) {
+    HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment,
+                                                      AccessAlignment);
+    return false;
+  }
+
+  HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment,
+                                               AccessAlignment);
+  return false;
+}
+
+// validate register against architecture
+bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+  return true;
+}
+
+// extern "C" void LLVMInitializeHexagonAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeHexagonAsmParser() {
+  RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+}
+
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_REGISTER_MATCHER
+#include "HexagonGenAsmMatcher.inc"
+
+namespace {
+bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+  if (Index >= Operands.size())
+    return false;
+  MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
+  if (!Operand.isToken())
+    return false;
+  return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
+}
+bool previousIsLoop(OperandVector &Operands, size_t Index) {
+  return previousEqual(Operands, Index, "loop0") ||
+         previousEqual(Operands, Index, "loop1") ||
+         previousEqual(Operands, Index, "sp1loop0") ||
+         previousEqual(Operands, Index, "sp2loop0") ||
+         previousEqual(Operands, Index, "sp3loop0");
+}
+}
+
+bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
+  AsmToken const &Token = getParser().getTok();
+  StringRef String = Token.getString();
+  SMLoc Loc = Token.getLoc();
+  getLexer().Lex();
+  do {
+    std::pair<StringRef, StringRef> HeadTail = String.split('.');
+    if (!HeadTail.first.empty())
+      Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc));
+    if (!HeadTail.second.empty())
+      Operands.push_back(HexagonOperand::CreateToken(
+          String.substr(HeadTail.first.size(), 1), Loc));
+    String = HeadTail.second;
+  } while (!String.empty());
+  return false;
+}
+
+bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
+  unsigned Register;
+  SMLoc Begin;
+  SMLoc End;
+  MCAsmLexer &Lexer = getLexer();
+  if (!ParseRegister(Register, Begin, End)) {
+    if (!ErrorMissingParenthesis)
+      switch (Register) {
+      default:
+        break;
+      case Hexagon::P0:
+      case Hexagon::P1:
+      case Hexagon::P2:
+      case Hexagon::P3:
+        if (previousEqual(Operands, 0, "if")) {
+          if (WarnMissingParenthesis)
+            Warning (Begin, "Missing parenthesis around predicate register");
+          static char const *LParen = "(";
+          static char const *RParen = ")";
+          Operands.push_back(HexagonOperand::CreateToken(LParen, Begin));
+          Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+          AsmToken MaybeDotNew = Lexer.getTok();
+          if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+              MaybeDotNew.getString().equals_lower(".new"))
+            splitIdentifier(Operands);
+          Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+          return false;
+        }
+        if (previousEqual(Operands, 0, "!") &&
+            previousEqual(Operands, 1, "if")) {
+          if (WarnMissingParenthesis)
+            Warning (Begin, "Missing parenthesis around predicate register");
+          static char const *LParen = "(";
+          static char const *RParen = ")";
+          Operands.insert(Operands.end () - 1,
+                          HexagonOperand::CreateToken(LParen, Begin));
+          Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+          AsmToken MaybeDotNew = Lexer.getTok();
+          if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+              MaybeDotNew.getString().equals_lower(".new"))
+            splitIdentifier(Operands);
+          Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+          return false;
+        }
+        break;
+      }
+    Operands.push_back(HexagonOperand::CreateReg(
+        Register, Begin, End));
+    return false;
+  }
+  return splitIdentifier(Operands);
+}
+
+bool HexagonAsmParser::isLabel(AsmToken &Token) {
+  MCAsmLexer &Lexer = getLexer();
+  AsmToken const &Second = Lexer.getTok();
+  AsmToken Third = Lexer.peekTok();  
+  StringRef String = Token.getString();
+  if (Token.is(AsmToken::TokenKind::LCurly) ||
+      Token.is(AsmToken::TokenKind::RCurly))
+    return false;
+  if (!Token.is(AsmToken::TokenKind::Identifier))
+    return true;
+  if (!MatchRegisterName(String.lower()))
+    return true;
+  (void)Second;
+  assert(Second.is(AsmToken::Colon));
+  StringRef Raw (String.data(), Third.getString().data() - String.data() +
+                 Third.getString().size());
+  std::string Collapsed = Raw;
+  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+                  Collapsed.end());
+  StringRef Whole = Collapsed;
+  std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
+  if (!MatchRegisterName(DotSplit.first.lower()))
+    return true;
+  return false;
+}
+
+bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) {
+  if (!Contigious && ErrorNoncontigiousRegister) {
+    Error(Loc, "Register name is not contigious");
+    return true;
+  }
+  if (!Contigious && WarnNoncontigiousRegister)
+    Warning(Loc, "Register name is not contigious");
+  return false;
+}
+
+bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  MCAsmLexer &Lexer = getLexer();
+  StartLoc = getLexer().getLoc();
+  SmallVector<AsmToken, 5> Lookahead;
+  StringRef RawString(Lexer.getTok().getString().data(), 0);
+  bool Again = Lexer.is(AsmToken::Identifier);
+  bool NeededWorkaround = false;
+  while (Again) {
+    AsmToken const &Token = Lexer.getTok();
+    RawString = StringRef(RawString.data(),
+                          Token.getString().data() - RawString.data () +
+                          Token.getString().size());
+    Lookahead.push_back(Token);
+    Lexer.Lex();
+    bool Contigious = Lexer.getTok().getString().data() ==
+                      Lookahead.back().getString().data() +
+                      Lookahead.back().getString().size();
+    bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) ||
+                Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) ||
+                Lexer.is(AsmToken::Colon);
+    bool Workaround = Lexer.is(AsmToken::Colon) ||
+                      Lookahead.back().is(AsmToken::Colon);
+    Again = (Contigious && Type) || (Workaround && Type);
+    NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
+  }
+  std::string Collapsed = RawString;
+  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+                  Collapsed.end());
+  StringRef FullString = Collapsed;
+  std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
+  unsigned DotReg = MatchRegisterName(DotSplit.first.lower());
+  if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+    if (DotSplit.second.empty()) {
+      RegNo = DotReg;
+      EndLoc = Lexer.getLoc();
+      if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+        return true;
+      return false;
+    } else {
+      RegNo = DotReg;
+      size_t First = RawString.find('.');
+      StringRef DotString (RawString.data() + First, RawString.size() - First);
+      Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
+      EndLoc = Lexer.getLoc();
+      if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+        return true;
+      return false;
+    }
+  }
+  std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
+  unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower());
+  if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+    RegNo = ColonReg;
+    EndLoc = Lexer.getLoc();
+    if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+      return true;
+    return false;
+  }
+  while (!Lookahead.empty()) {
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+  }
+  return true;
+}
+
+bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
+  if (previousEqual(Operands, 0, "call"))
+    return true;
+  if (previousEqual(Operands, 0, "jump"))
+    if (!getLexer().getTok().is(AsmToken::Colon))
+      return true;
+  if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1))
+    return true;
+  if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") &&
+      (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t")))
+    return true;
+  return false;
+}
+
+bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
+  llvm::SmallVector<AsmToken, 4> Tokens;
+  MCAsmLexer &Lexer = getLexer();
+  bool Done = false;
+  static char const * Comma = ",";
+  do {
+    Tokens.emplace_back (Lexer.getTok());
+    Lexer.Lex();
+    switch (Tokens.back().getKind())
+    {
+    case AsmToken::TokenKind::Hash:
+      if (Tokens.size () > 1)
+        if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) {
+          Tokens.insert(Tokens.end() - 2,
+                        AsmToken(AsmToken::TokenKind::Comma, Comma));
+          Done = true;
+        }
+      break;
+    case AsmToken::TokenKind::RCurly:
+    case AsmToken::TokenKind::EndOfStatement:
+    case AsmToken::TokenKind::Eof:
+      Done = true;
+      break;
+    default:
+      break;
+    }
+  } while (!Done);
+  while (!Tokens.empty()) {
+    Lexer.UnLex(Tokens.back());
+    Tokens.pop_back();
+  }
+  return getParser().parseExpression(Expr);
+}
+
+bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) {
+  if (implicitExpressionLocation(Operands)) {
+    MCAsmParser &Parser = getParser();
+    SMLoc Loc = Parser.getLexer().getLoc();
+    std::unique_ptr<HexagonOperand> Expr =
+        HexagonOperand::CreateImm(nullptr, Loc, Loc);
+    MCExpr const *& Val = Expr->Imm.Val;
+    Operands.push_back(std::move(Expr));
+    return parseExpression(Val);
+  }
+  return parseOperand(Operands);
+}
+
+/// Parse an instruction.
+bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  MCAsmLexer &Lexer = getLexer();
+  while (true) {
+    AsmToken const &Token = Parser.getTok();
+    switch (Token.getKind()) {
+    case AsmToken::EndOfStatement: {
+      Lexer.Lex();
+      return false;
+    }
+    case AsmToken::LCurly: {
+      if (!Operands.empty())
+        return true;
+      Operands.push_back(
+          HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+      Lexer.Lex();
+      return false;
+    }
+    case AsmToken::RCurly: {
+      if (Operands.empty()) {
+        Operands.push_back(
+            HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+        Lexer.Lex();
+      }
+      return false;
+    }
+    case AsmToken::Comma: {
+      Lexer.Lex();
+      continue;
+    }
+    case AsmToken::EqualEqual:
+    case AsmToken::ExclaimEqual:
+    case AsmToken::GreaterEqual:
+    case AsmToken::GreaterGreater:
+    case AsmToken::LessEqual:
+    case AsmToken::LessLess: {
+      Operands.push_back(HexagonOperand::CreateToken(
+          Token.getString().substr(0, 1), Token.getLoc()));
+      Operands.push_back(HexagonOperand::CreateToken(
+          Token.getString().substr(1, 1), Token.getLoc()));
+      Lexer.Lex();
+      continue;
+    }
+    case AsmToken::Hash: {
+      bool MustNotExtend = false;
+      bool ImplicitExpression = implicitExpressionLocation(Operands);
+      std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm(
+          nullptr, Lexer.getLoc(), Lexer.getLoc());
+      if (!ImplicitExpression)
+        Operands.push_back(
+          HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+      Lexer.Lex();
+      bool MustExtend = false;
+      bool HiOnly = false;
+      bool LoOnly = false;
+      if (Lexer.is(AsmToken::Hash)) {
+        Lexer.Lex();
+        MustExtend = true;
+      } else if (ImplicitExpression)
+        MustNotExtend = true;
+      AsmToken const &Token = Parser.getTok();
+      if (Token.is(AsmToken::Identifier)) {
+        StringRef String = Token.getString();
+        AsmToken IDToken = Token;
+        if (String.lower() == "hi") {
+          HiOnly = true;
+        } else if (String.lower() == "lo") {
+          LoOnly = true;
+        }
+        if (HiOnly || LoOnly) {
+          AsmToken LParen = Lexer.peekTok();
+          if (!LParen.is(AsmToken::LParen)) {
+            HiOnly = false;
+            LoOnly = false;
+          } else {
+            Lexer.Lex();
+          }
+        }
+      }
+      if (parseExpression(Expr->Imm.Val))
+        return true;
+      int64_t Value;
+      MCContext &Context = Parser.getContext();
+      assert(Expr->Imm.Val != nullptr);
+      if (Expr->Imm.Val->evaluateAsAbsolute(Value)) {
+        if (HiOnly)
+          Expr->Imm.Val = MCBinaryExpr::createLShr(
+              Expr->Imm.Val, MCConstantExpr::create(16, Context), Context);
+        if (HiOnly || LoOnly)
+          Expr->Imm.Val = MCBinaryExpr::createAnd(
+              Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context);
+      }
+      if (MustNotExtend)
+        Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context);
+      Expr->Imm.MustExtend = MustExtend;
+      Operands.push_back(std::move(Expr));
+      continue;
+    }
+    default:
+      break;
+    }
+    if (parseExpressionOrOperand(Operands))
+      return true;
+  }
+}
+
+bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                        StringRef Name,
+                                        AsmToken ID,
+                                        OperandVector &Operands) {
+  getLexer().UnLex(ID);
+  return parseInstruction(Operands);
+}
+
+namespace {
+MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+                       MCOperand &MO1, MCOperand &MO2) {
+  MCInst TmpInst;
+  TmpInst.setOpcode(opCode);
+  TmpInst.addOperand(Rdd);
+  TmpInst.addOperand(MO1);
+  TmpInst.addOperand(MO2);
+
+  return TmpInst;
+}
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+                                                      unsigned Kind) {
+  HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp);
+
+  switch (Kind) {
+  case MCK_0: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  case MCK_1: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  case MCK__MINUS_1: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  }
+  if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
+    StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
+    if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind)
+      return Match_Success;
+    if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind)
+      return Match_Success;
+  }
+
+  DEBUG(dbgs() << "Unmatched Operand:");
+  DEBUG(Op->dump());
+  DEBUG(dbgs() << "\n");
+
+  return Match_InvalidOperand;
+}
+
+void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+  std::string errStr;
+  raw_string_ostream ES(errStr);
+  ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
+  if (Max >= 0)
+    ES << "0-" << Max;
+  else
+    ES << Max << "-" << (-Max - 1);
+  Error(IDLoc, ES.str().c_str());
+}
+
+int HexagonAsmParser::processInstruction(MCInst &Inst,
+                                         OperandVector const &Operands,
+                                         SMLoc IDLoc, bool &MustExtend) {
+  MCContext &Context = getParser().getContext();
+  const MCRegisterInfo *RI = getContext().getRegisterInfo();
+  std::string r = "r";
+  std::string v = "v";
+  std::string Colon = ":";
+
+  bool is32bit = false; // used to distinguish between CONST32 and CONST64
+  switch (Inst.getOpcode()) {
+  default:
+    break;
+
+  case Hexagon::M4_mpyrr_addr:
+  case Hexagon::S4_addi_asl_ri:
+  case Hexagon::S4_addi_lsr_ri:
+  case Hexagon::S4_andi_asl_ri:
+  case Hexagon::S4_andi_lsr_ri:
+  case Hexagon::S4_ori_asl_ri:
+  case Hexagon::S4_ori_lsr_ri:
+  case Hexagon::S4_or_andix:
+  case Hexagon::S4_subi_asl_ri:
+  case Hexagon::S4_subi_lsr_ri: {
+    MCOperand &Ry = Inst.getOperand(0);
+    MCOperand &src = Inst.getOperand(2);
+    if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg()))
+      return Match_InvalidOperand;
+    break;
+  }
+
+  case Hexagon::C2_cmpgei: {
+    MCOperand &MO = Inst.getOperand(2);
+    MO.setExpr(MCBinaryExpr::createSub(
+        MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+    Inst.setOpcode(Hexagon::C2_cmpgti);
+    break;
+  }
+
+  case Hexagon::C2_cmpgeui: {
+    MCOperand &MO = Inst.getOperand(2);
+    int64_t Value;
+    bool Success = MO.getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success && "Assured by matcher");
+    if (Value == 0) {
+      MCInst TmpInst;
+      MCOperand &Pd = Inst.getOperand(0);
+      MCOperand &Rt = Inst.getOperand(1);
+      TmpInst.setOpcode(Hexagon::C2_cmpeq);
+      TmpInst.addOperand(Pd);
+      TmpInst.addOperand(Rt);
+      TmpInst.addOperand(Rt);
+      Inst = TmpInst;
+    } else {
+      MO.setExpr(MCBinaryExpr::createSub(
+          MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::C2_cmpgtui);
+    }
+    break;
+  }
+  case Hexagon::J2_loop1r:
+  case Hexagon::J2_loop1i:
+  case Hexagon::J2_loop0r:
+  case Hexagon::J2_loop0i: {
+    MCOperand &MO = Inst.getOperand(0);
+    // Loop has different opcodes for extended vs not extended, but we should
+    //   not use the other opcode as it is a legacy artifact of TD files.
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+      // if the operand can fit within a 7:2 field
+      if (Value < (1 << 8) && Value >= -(1 << 8)) {
+        SMLoc myLoc = Operands[2]->getStartLoc();
+        // # is left in startLoc in the case of ##
+        // If '##' found then force extension.
+        if (*myLoc.getPointer() == '#') {
+          MustExtend = true;
+          break;
+        }
+      } else {
+        // If immediate and out of 7:2 range.
+        MustExtend = true;
+      }
+    }
+    break;
+  }
+
+  // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+  case Hexagon::A2_tfrp: {
+    MCOperand &MO = Inst.getOperand(1);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode(Hexagon::A2_combinew);
+    break;
+  }
+
+  case Hexagon::A2_tfrpt:
+  case Hexagon::A2_tfrpf: {
+    MCOperand &MO = Inst.getOperand(2);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+                       ? Hexagon::C2_ccombinewt
+                       : Hexagon::C2_ccombinewf);
+    break;
+  }
+  case Hexagon::A2_tfrptnew:
+  case Hexagon::A2_tfrpfnew: {
+    MCOperand &MO = Inst.getOperand(2);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+                       ? Hexagon::C2_ccombinewnewt
+                       : Hexagon::C2_ccombinewnewf);
+    break;
+  }
+
+  // Translate a "$Rx =  CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
+  case Hexagon::CONST32:
+  case Hexagon::CONST32_Float_Real:
+  case Hexagon::CONST32_Int_Real:
+  case Hexagon::FCONST32_nsdata:
+    is32bit = true;
+  // Translate a "$Rx:y =  CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
+  case Hexagon::CONST64_Float_Real:
+  case Hexagon::CONST64_Int_Real:
+
+    // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+    if (!Parser.getStreamer().hasRawTextSupport()) {
+      MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+      MCOperand &MO_1 = Inst.getOperand(1);
+      MCOperand &MO_0 = Inst.getOperand(0);
+
+      // push section onto section stack
+      MES->PushSection();
+
+      std::string myCharStr;
+      MCSectionELF *mySection;
+
+      // check if this as an immediate or a symbol
+      int64_t Value;
+      bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value);
+      if (Absolute) {
+        // Create a new section - one for each constant
+        // Some or all of the zeros are replaced with the given immediate.
+        if (is32bit) {
+          std::string myImmStr = utohexstr(static_cast<uint32_t>(Value));
+          myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000")
+                          .drop_back(myImmStr.size())
+                          .str() +
+                      myImmStr;
+        } else {
+          std::string myImmStr = utohexstr(Value);
+          myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000")
+                          .drop_back(myImmStr.size())
+                          .str() +
+                      myImmStr;
+        }
+
+        mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+                                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+      } else if (MO_1.isExpr()) {
+        // .lita - for expressions
+        myCharStr = ".lita";
+        mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+                                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+      } else
+        llvm_unreachable("unexpected type of machine operand!");
+
+      MES->SwitchSection(mySection);
+      unsigned byteSize = is32bit ? 4 : 8;
+      getStreamer().EmitCodeAlignment(byteSize, byteSize);
+
+      MCSymbol *Sym;
+
+      // for symbols, get rid of prepended ".gnu.linkonce.lx."
+
+      // emit symbol if needed
+      if (Absolute) {
+        Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
+        if (Sym->isUndefined()) {
+          getStreamer().EmitLabel(Sym);
+          getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+          getStreamer().EmitIntValue(Value, byteSize);
+        }
+      } else if (MO_1.isExpr()) {
+        const char *StringStart = 0;
+        const char *StringEnd = 0;
+        if (*Operands[4]->getStartLoc().getPointer() == '#') {
+          StringStart = Operands[5]->getStartLoc().getPointer();
+          StringEnd = Operands[6]->getStartLoc().getPointer();
+        } else { // no pound
+          StringStart = Operands[4]->getStartLoc().getPointer();
+          StringEnd = Operands[5]->getStartLoc().getPointer();
+        }
+
+        unsigned size = StringEnd - StringStart;
+        std::string DotConst = ".CONST_";
+        Sym = getContext().getOrCreateSymbol(DotConst +
+                                             StringRef(StringStart, size));
+
+        if (Sym->isUndefined()) {
+          // case where symbol is not yet defined: emit symbol
+          getStreamer().EmitLabel(Sym);
+          getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
+          getStreamer().EmitValue(MO_1.getExpr(), 4);
+        }
+      } else
+        llvm_unreachable("unexpected type of machine operand!");
+
+      MES->PopSection();
+
+      if (Sym) {
+        MCInst TmpInst;
+        if (is32bit) // 32 bit
+          TmpInst.setOpcode(Hexagon::L2_loadrigp);
+        else // 64 bit
+          TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+
+        TmpInst.addOperand(MO_0);
+        TmpInst.addOperand(
+            MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+        Inst = TmpInst;
+      }
+    }
+    break;
+
+  // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)"
+  case Hexagon::A2_tfrpi: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO = Inst.getOperand(1);
+    int64_t Value;
+    int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0;
+    MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context)));
+    Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO);
+    break;
+  }
+
+  // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)"
+  case Hexagon::TFRI64_V4: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO = Inst.getOperand(1);
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+      unsigned long long u64 = Value;
+      signed int s8 = (u64 >> 32) & 0xFFFFFFFF;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+      MCOperand imm(MCOperand::createExpr(
+          MCConstantExpr::create(s8, Context))); // upper 32
+      MCOperand imm2(MCOperand::createExpr(
+          MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32
+      Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2);
+    } else {
+      MCOperand imm(MCOperand::createExpr(
+          MCConstantExpr::create(0, Context))); // upper 32
+      Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO);
+    }
+    break;
+  }
+
+  // Handle $Rdd = combine(##imm, #imm)"
+  case Hexagon::TFRI64_V2_ext: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO1 = Inst.getOperand(1);
+    MCOperand &MO2 = Inst.getOperand(2);
+    int64_t Value;
+    if (MO2.getExpr()->evaluateAsAbsolute(Value)) {
+      int s8 = Value;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+    }
+    Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2);
+    break;
+  }
+
+  // Handle $Rdd = combine(#imm, ##imm)"
+  case Hexagon::A4_combineii: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO1 = Inst.getOperand(1);
+    int64_t Value;
+    if (MO1.getExpr()->evaluateAsAbsolute(Value)) {
+      int s8 = Value;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+    }
+    MCOperand &MO2 = Inst.getOperand(2);
+    Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2);
+    break;
+  }
+
+  case Hexagon::S2_tableidxb_goodsyntax: {
+    Inst.setOpcode(Hexagon::S2_tableidxb);
+    break;
+  }
+
+  case Hexagon::S2_tableidxh_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(1, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxh);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_tableidxw_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(2, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxw);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_tableidxd_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(3, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxd);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::M2_mpyui: {
+    Inst.setOpcode(Hexagon::M2_mpyi);
+    break;
+  }
+  case Hexagon::M2_mpysmi: {
+    MCInst TmpInst;
+    MCOperand &Rd = Inst.getOperand(0);
+    MCOperand &Rs = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (!MustExtend) {
+      if (Value < 0 && Value > -256) {
+        Imm.setExpr(MCConstantExpr::create(Value * -1, Context));
+        TmpInst.setOpcode(Hexagon::M2_mpysin);
+      } else if (Value < 256 && Value >= 0)
+        TmpInst.setOpcode(Hexagon::M2_mpysip);
+      else
+        return Match_InvalidOperand;
+    } else {
+      if (Value >= 0)
+        TmpInst.setOpcode(Hexagon::M2_mpysip);
+      else
+        return Match_InvalidOperand;
+    }
+    TmpInst.addOperand(Rd);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+    MCOperand &Imm = Inst.getOperand(2);
+    MCInst TmpInst;
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) { // convert to $Rd = $Rs
+      TmpInst.setOpcode(Hexagon::A2_tfr);
+      MCOperand &Rd = Inst.getOperand(0);
+      MCOperand &Rs = Inst.getOperand(1);
+      TmpInst.addOperand(Rd);
+      TmpInst.addOperand(Rs);
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+      MCOperand &Rd = Inst.getOperand(0);
+      MCOperand &Rs = Inst.getOperand(1);
+      TmpInst.addOperand(Rd);
+      TmpInst.addOperand(Rs);
+      TmpInst.addOperand(Imm);
+    }
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
+      MCInst TmpInst;
+      unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+      std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+      StringRef Reg1(R1);
+      Rss.setReg(MatchRegisterName(Reg1));
+      // Add a new operand for the second register in the pair.
+      std::string R2 = r + llvm::utostr_32(RegPairNum);
+      StringRef Reg2(R2);
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(Rdd);
+      TmpInst.addOperand(Rss);
+      TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+      Inst = TmpInst;
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+    }
+    break;
+  }
+
+  case Hexagon::A4_boundscheck: {
+    MCOperand &Rs = Inst.getOperand(1);
+    unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+    if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+      Inst.setOpcode(Hexagon::A4_boundscheck_hi);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    } else { // raw:lo
+      Inst.setOpcode(Hexagon::A4_boundscheck_lo);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::A2_addsp: {
+    MCOperand &Rs = Inst.getOperand(1);
+    unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+    if (RegNum & 1) { // Odd mapped to raw:hi
+      Inst.setOpcode(Hexagon::A2_addsph);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped raw:lo
+      Inst.setOpcode(Hexagon::A2_addspl);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_s1: {
+    MCOperand &Rt = Inst.getOperand(2);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to sat:raw:hi
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped sat:raw:lo
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_acc_s1: {
+    MCInst TmpInst;
+    MCOperand &Rxx = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(2);
+    MCOperand &Rt = Inst.getOperand(3);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to sat:raw:hi
+      TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped sat:raw:lo
+      TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    // Registers are in different positions
+    TmpInst.addOperand(Rxx);
+    TmpInst.addOperand(Rxx);
+    TmpInst.addOperand(Rss);
+    TmpInst.addOperand(Rt);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_s1rp: {
+    MCOperand &Rt = Inst.getOperand(2);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped rnd:sat:raw:lo
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0)
+      Inst.setOpcode(Hexagon::S2_vsathub);
+    else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+    }
+    break;
+  }
+
+  case Hexagon::S5_vasrhrnd_goodsyntax: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) {
+      MCInst TmpInst;
+      unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+      std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+      StringRef Reg1(R1);
+      Rss.setReg(MatchRegisterName(Reg1));
+      // Add a new operand for the second register in the pair.
+      std::string R2 = r + llvm::utostr_32(RegPairNum);
+      StringRef Reg2(R2);
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(Rdd);
+      TmpInst.addOperand(Rss);
+      TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+      Inst = TmpInst;
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S5_vasrhrnd);
+    }
+    break;
+  }
+
+  case Hexagon::A2_not: {
+    MCInst TmpInst;
+    MCOperand &Rd = Inst.getOperand(0);
+    MCOperand &Rs = Inst.getOperand(1);
+    TmpInst.setOpcode(Hexagon::A2_subri);
+    TmpInst.addOperand(Rd);
+    TmpInst.addOperand(
+        MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+    TmpInst.addOperand(Rs);
+    Inst = TmpInst;
+    break;
+  }
+  } // switch
+
+  return Match_Success;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
new file mode 100644
index 0000000..ea96eb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -0,0 +1,1127 @@
+//===--- BitTracker.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// SSA-based bit propagation.
+//
+// The purpose of this code is, for a given virtual register, to provide
+// information about the value of each bit in the register. The values
+// of bits are represented by the class BitValue, and take one of four
+// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the
+// "ref" value means that the bit is a copy of another bit (which itself
+// cannot be a copy of yet another bit---such chains are not allowed).
+// A "ref" value is associated with a BitRef structure, which indicates
+// which virtual register, and which bit in that register is the origin
+// of the value. For example, given an instruction
+//   vreg2 = ASL vreg1, 1
+// assuming that nothing is known about bits of vreg1, bit 1 of vreg2
+// will be a "ref" to (vreg1, 0). If there is a subsequent instruction
+//   vreg3 = ASL vreg2, 2
+// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well.
+// The "bottom" case means that the bit's value cannot be determined,
+// and that this virtual register actually defines it. The "bottom" case
+// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref
+// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to
+// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc.
+//
+// The tracker implements the Wegman-Zadeck algorithm, originally developed
+// for SSA-based constant propagation. Each register is represented as
+// a sequence of bits, with the convention that bit 0 is the least signi-
+// ficant bit. Each bit is propagated individually. The class RegisterCell
+// implements the register's representation, and is also the subject of
+// the lattice operations in the tracker.
+//
+// The intended usage of the bit tracker is to create a target-specific
+// machine instruction evaluator, pass the evaluator to the BitTracker
+// object, and run the tracker. The tracker will then collect the bit
+// value information for a given machine function. After that, it can be
+// queried for the cells for each virtual register.
+// Sample code:
+//   const TargetSpecificEvaluator TSE(TRI, MRI);
+//   BitTracker BT(TSE, MF);
+//   BT.run();
+//   ...
+//   unsigned Reg = interestingRegister();
+//   RegisterCell RC = BT.get(Reg);
+//   if (RC[3].is(1))
+//      Reg0bit3 = 1;
+//
+// The code below is intended to be fully target-independent.
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "BitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+namespace {
+  // Local trickery to pretty print a register (without the whole "%vreg"
+  // business).
+  struct printv {
+    printv(unsigned r) : R(r) {}
+    unsigned R;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
+    if (PV.R)
+      OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
+    else
+      OS << 's';
+    return OS;
+  }
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) {
+  switch (BV.Type) {
+    case BT::BitValue::Top:
+      OS << 'T';
+      break;
+    case BT::BitValue::Zero:
+      OS << '0';
+      break;
+    case BT::BitValue::One:
+      OS << '1';
+      break;
+    case BT::BitValue::Ref:
+      OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']';
+      break;
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) {
+  unsigned n = RC.Bits.size();
+  OS << "{ w:" << n;
+  // Instead of printing each bit value individually, try to group them
+  // into logical segments, such as sequences of 0 or 1 bits or references
+  // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz").
+  // "Start" will be the index of the beginning of the most recent segment.
+  unsigned Start = 0;
+  bool SeqRef = false;    // A sequence of refs to consecutive bits.
+  bool ConstRef = false;  // A sequence of refs to the same bit.
+
+  for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) {
+    const BT::BitValue &V = RC[i];
+    const BT::BitValue &SV = RC[Start];
+    bool IsRef = (V.Type == BT::BitValue::Ref);
+    // If the current value is the same as Start, skip to the next one.
+    if (!IsRef && V == SV)
+      continue;
+    if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) {
+      if (Start+1 == i) {
+        SeqRef = (V.RefI.Pos == SV.RefI.Pos+1);
+        ConstRef = (V.RefI.Pos == SV.RefI.Pos);
+      }
+      if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start))
+        continue;
+      if (ConstRef && V.RefI.Pos == SV.RefI.Pos)
+        continue;
+    }
+
+    // The current value is different. Print the previous one and reset
+    // the Start.
+    OS << " [" << Start;
+    unsigned Count = i - Start;
+    if (Count == 1) {
+      OS << "]:" << SV;
+    } else {
+      OS << '-' << i-1 << "]:";
+      if (SV.Type == BT::BitValue::Ref && SeqRef)
+        OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+           << SV.RefI.Pos+(Count-1) << ']';
+      else
+        OS << SV;
+    }
+    Start = i;
+    SeqRef = ConstRef = false;
+  }
+
+  OS << " [" << Start;
+  unsigned Count = n - Start;
+  if (n-Start == 1) {
+    OS << "]:" << RC[Start];
+  } else {
+    OS << '-' << n-1 << "]:";
+    const BT::BitValue &SV = RC[Start];
+    if (SV.Type == BT::BitValue::Ref && SeqRef)
+      OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+         << SV.RefI.Pos+(Count-1) << ']';
+    else
+      OS << SV;
+  }
+  OS << " }";
+
+  return OS;
+}
+
+BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
+    : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
+
+BitTracker::~BitTracker() {
+  delete &Map;
+}
+
+
+// If we were allowed to update a cell for a part of a register, the meet
+// operation would need to be parametrized by the register number and the
+// exact part of the register, so that the computer BitRefs correspond to
+// the actual bits of the "self" register.
+// While this cannot happen in the current implementation, I'm not sure
+// if this should be ruled out in the future.
+bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
+  // An example when "meet" can be invoked with SelfR == 0 is a phi node
+  // with a physical register as an operand.
+  assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR));
+  bool Changed = false;
+  for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
+    const BitValue &RCV = RC[i];
+    Changed |= Bits[i].meet(RCV, BitRef(SelfR, i));
+  }
+  return Changed;
+}
+
+
+// Insert the entire cell RC into the current cell at position given by M.
+BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
+      const BitMask &M) {
+  uint16_t B = M.first(), E = M.last(), W = width();
+  // Sanity: M must be a valid mask for *this.
+  assert(B < W && E < W);
+  // Sanity: the masked part of *this must have the same number of bits
+  // as the source.
+  assert(B > E || E-B+1 == RC.width());      // B <= E  =>  E-B+1 = |RC|.
+  assert(B <= E || E+(W-B)+1 == RC.width()); // E < B   =>  E+(W-B)+1 = |RC|.
+  if (B <= E) {
+    for (uint16_t i = 0; i <= E-B; ++i)
+      Bits[i+B] = RC[i];
+  } else {
+    for (uint16_t i = 0; i < W-B; ++i)
+      Bits[i+B] = RC[i];
+    for (uint16_t i = 0; i <= E; ++i)
+      Bits[i] = RC[i+(W-B)];
+  }
+  return *this;
+}
+
+
+BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
+  uint16_t B = M.first(), E = M.last(), W = width();
+  assert(B < W && E < W);
+  if (B <= E) {
+    RegisterCell RC(E-B+1);
+    for (uint16_t i = B; i <= E; ++i)
+      RC.Bits[i-B] = Bits[i];
+    return RC;
+  }
+
+  RegisterCell RC(E+(W-B)+1);
+  for (uint16_t i = 0; i < W-B; ++i)
+    RC.Bits[i] = Bits[i+B];
+  for (uint16_t i = 0; i <= E; ++i)
+    RC.Bits[i+(W-B)] = Bits[i];
+  return RC;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
+  // Rotate left (i.e. towards increasing bit indices).
+  // Swap the two parts:  [0..W-Sh-1] [W-Sh..W-1]
+  uint16_t W = width();
+  Sh = Sh % W;
+  if (Sh == 0)
+    return *this;
+
+  RegisterCell Tmp(W-Sh);
+  // Tmp = [0..W-Sh-1].
+  for (uint16_t i = 0; i < W-Sh; ++i)
+    Tmp[i] = Bits[i];
+  // Shift [W-Sh..W-1] to [0..Sh-1].
+  for (uint16_t i = 0; i < Sh; ++i)
+    Bits[i] = Bits[W-Sh+i];
+  // Copy Tmp to [Sh..W-1].
+  for (uint16_t i = 0; i < W-Sh; ++i)
+    Bits[i+Sh] = Tmp.Bits[i];
+  return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
+      const BitValue &V) {
+  assert(B <= E);
+  while (B < E)
+    Bits[B++] = V;
+  return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
+  // Append the cell given as the argument to the "this" cell.
+  // Bit 0 of RC becomes bit W of the result, where W is this->width().
+  uint16_t W = width(), WRC = RC.width();
+  Bits.resize(W+WRC);
+  for (uint16_t i = 0; i < WRC; ++i)
+    Bits[i+W] = RC.Bits[i];
+  return *this;
+}
+
+
+uint16_t BT::RegisterCell::ct(bool B) const {
+  uint16_t W = width();
+  uint16_t C = 0;
+  BitValue V = B;
+  while (C < W && Bits[C] == V)
+    C++;
+  return C;
+}
+
+
+uint16_t BT::RegisterCell::cl(bool B) const {
+  uint16_t W = width();
+  uint16_t C = 0;
+  BitValue V = B;
+  while (C < W && Bits[W-(C+1)] == V)
+    C++;
+  return C;
+}
+
+
+bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
+  uint16_t W = Bits.size();
+  if (RC.Bits.size() != W)
+    return false;
+  for (uint16_t i = 0; i < W; ++i)
+    if (Bits[i] != RC[i])
+      return false;
+  return true;
+}
+
+
+uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
+  // The general problem is with finding a register class that corresponds
+  // to a given reference reg:sub. There can be several such classes, and
+  // since we only care about the register size, it does not matter which
+  // such class we would find.
+  // The easiest way to accomplish what we want is to
+  // 1. find a physical register PhysR from the same class as RR.Reg,
+  // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
+  // 3. find a register class that contains PhysS.
+  unsigned PhysR;
+  if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+    const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg);
+    assert(VC->begin() != VC->end() && "Empty register class");
+    PhysR = *VC->begin();
+  } else {
+    assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+    PhysR = RR.Reg;
+  }
+
+  unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub);
+  const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS);
+  uint16_t BW = RC->getSize()*8;
+  return BW;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
+      const CellMapType &M) const {
+  uint16_t BW = getRegBitWidth(RR);
+
+  // Physical registers are assumed to be present in the map with an unknown
+  // value. Don't actually insert anything in the map, just return the cell.
+  if (TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+    return RegisterCell::self(0, BW);
+
+  assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+  // For virtual registers that belong to a class that is not tracked,
+  // generate an "unknown" value as well.
+  const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
+  if (!track(C))
+    return RegisterCell::self(0, BW);
+
+  CellMapType::const_iterator F = M.find(RR.Reg);
+  if (F != M.end()) {
+    if (!RR.Sub)
+      return F->second;
+    BitMask M = mask(RR.Reg, RR.Sub);
+    return F->second.extract(M);
+  }
+  // If not found, create a "top" entry, but do not insert it in the map.
+  return RegisterCell::top(BW);
+}
+
+
+void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
+      CellMapType &M) const {
+  // While updating the cell map can be done in a meaningful way for
+  // a part of a register, it makes little sense to implement it as the
+  // SSA representation would never contain such "partial definitions".
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return;
+  assert(RR.Sub == 0 && "Unexpected sub-register in definition");
+  // Eliminate all ref-to-reg-0 bit values: replace them with "self".
+  for (unsigned i = 0, n = RC.width(); i < n; ++i) {
+    const BitValue &V = RC[i];
+    if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+      RC[i].RefI = BitRef(RR.Reg, i);
+  }
+  M[RR.Reg] = RC;
+}
+
+
+// Check if the cell represents a compile-time integer value.
+bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
+  uint16_t W = A.width();
+  for (uint16_t i = 0; i < W; ++i)
+    if (!A[i].is(0) && !A[i].is(1))
+      return false;
+  return true;
+}
+
+
+// Convert a cell to the integer value. The result must fit in uint64_t.
+uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
+  assert(isInt(A));
+  uint64_t Val = 0;
+  uint16_t W = A.width();
+  for (uint16_t i = 0; i < W; ++i) {
+    Val <<= 1;
+    Val |= A[i].is(1);
+  }
+  return Val;
+}
+
+
+// Evaluator helper functions. These implement some common operation on
+// register cells that can be used to implement target-specific instructions
+// in a target-specific evaluator.
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
+  RegisterCell Res(W);
+  // For bits beyond the 63rd, this will generate the sign bit of V.
+  for (uint16_t i = 0; i < W; ++i) {
+    Res[i] = BitValue(V & 1);
+    V >>= 1;
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
+  APInt A = CI->getValue();
+  uint16_t BW = A.getBitWidth();
+  assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow");
+  RegisterCell Res(BW);
+  for (uint16_t i = 0; i < BW; ++i)
+    Res[i] = A[i];
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  bool Carry = false;
+  uint16_t I;
+  for (I = 0; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (!V1.num() || !V2.num())
+      break;
+    unsigned S = bool(V1) + bool(V2) + Carry;
+    Res[I] = BitValue(S & 1);
+    Carry = (S > 1);
+  }
+  for (; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    // If the next bit is same as Carry, the result will be 0 plus the
+    // other bit. The Carry bit will remain unchanged.
+    if (V1.is(Carry))
+      Res[I] = BitValue::ref(V2);
+    else if (V2.is(Carry))
+      Res[I] = BitValue::ref(V1);
+    else
+      break;
+  }
+  for (; I < W; ++I)
+    Res[I] = BitValue::self();
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  bool Borrow = false;
+  uint16_t I;
+  for (I = 0; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (!V1.num() || !V2.num())
+      break;
+    unsigned S = bool(V1) - bool(V2) - Borrow;
+    Res[I] = BitValue(S & 1);
+    Borrow = (S > 1);
+  }
+  for (; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (V1.is(Borrow)) {
+      Res[I] = BitValue::ref(V2);
+      break;
+    }
+    if (V2.is(Borrow))
+      Res[I] = BitValue::ref(V1);
+    else
+      break;
+  }
+  for (; I < W; ++I)
+    Res[I] = BitValue::self();
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width() + A2.width();
+  uint16_t Z = A1.ct(0) + A2.ct(0);
+  RegisterCell Res(W);
+  Res.fill(0, Z, BitValue::Zero);
+  Res.fill(Z, W, BitValue::self());
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width() + A2.width();
+  uint16_t Z = A1.ct(0) + A2.ct(0);
+  RegisterCell Res(W);
+  Res.fill(0, Z, BitValue::Zero);
+  Res.fill(Z, W, BitValue::self());
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
+      uint16_t Sh) const {
+  assert(Sh <= A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.rol(Sh);
+  Res.fill(0, Sh, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
+      uint16_t Sh) const {
+  uint16_t W = A1.width();
+  assert(Sh <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.rol(W-Sh);
+  Res.fill(W-Sh, W, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
+      uint16_t Sh) const {
+  uint16_t W = A1.width();
+  assert(Sh <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  BitValue Sign = Res[W-1];
+  Res.rol(W-Sh);
+  Res.fill(W-Sh, W, Sign);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(1))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(1))
+      Res[i] = BitValue::ref(V1);
+    else if (V1.is(0) || V2.is(0))
+      Res[i] = BitValue::Zero;
+    else if (V1 == V2)
+      Res[i] = V1;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(1) || V2.is(1))
+      Res[i] = BitValue::One;
+    else if (V1.is(0))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(0))
+      Res[i] = BitValue::ref(V1);
+    else if (V1 == V2)
+      Res[i] = V1;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(0))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(0))
+      Res[i] = BitValue::ref(V1);
+    else if (V1 == V2)
+      Res[i] = BitValue::Zero;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
+  uint16_t W = A1.width();
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V = A1[i];
+    if (V.is(0))
+      Res[i] = BitValue::One;
+    else if (V.is(1))
+      Res[i] = BitValue::Zero;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
+      uint16_t BitN) const {
+  assert(BitN < A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res[BitN] = BitValue::One;
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
+      uint16_t BitN) const {
+  assert(BitN < A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res[BitN] = BitValue::Zero;
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
+      uint16_t W) const {
+  uint16_t C = A1.cl(B), AW = A1.width();
+  // If the last leading non-B bit is not a constant, then we don't know
+  // the real count.
+  if ((C < AW && A1[AW-1-C].num()) || C == AW)
+    return eIMM(C, W);
+  return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
+      uint16_t W) const {
+  uint16_t C = A1.ct(B), AW = A1.width();
+  // If the last trailing non-B bit is not a constant, then we don't know
+  // the real count.
+  if ((C < AW && A1[C].num()) || C == AW)
+    return eIMM(C, W);
+  return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
+      uint16_t FromN) const {
+  uint16_t W = A1.width();
+  assert(FromN <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  BitValue Sign = Res[FromN-1];
+  // Sign-extend "inreg".
+  Res.fill(FromN, W, Sign);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
+      uint16_t FromN) const {
+  uint16_t W = A1.width();
+  assert(FromN <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.fill(FromN, W, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
+      uint16_t B, uint16_t E) const {
+  uint16_t W = A1.width();
+  assert(B < W && E <= W);
+  if (B == E)
+    return RegisterCell(0);
+  uint16_t Last = (E > 0) ? E-1 : W-1;
+  RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last));
+  // Return shorter cell.
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
+      const RegisterCell &A2, uint16_t AtN) const {
+  uint16_t W1 = A1.width(), W2 = A2.width();
+  (void)W1;
+  assert(AtN < W1 && AtN+W2 <= W1);
+  // Copy bits from A1, insert A2 at position AtN.
+  RegisterCell Res = RegisterCell::ref(A1);
+  if (W2 > 0)
+    Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1));
+  return Res;
+}
+
+
+BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
+  uint16_t W = getRegBitWidth(Reg);
+  assert(W > 0 && "Cannot generate mask for empty register");
+  return BitMask(0, W-1);
+}
+
+
+bool BT::MachineEvaluator::evaluate(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::REG_SEQUENCE: {
+      RegisterRef RD = MI->getOperand(0);
+      assert(RD.Sub == 0);
+      RegisterRef RS = MI->getOperand(1);
+      unsigned SS = MI->getOperand(2).getImm();
+      RegisterRef RT = MI->getOperand(3);
+      unsigned ST = MI->getOperand(4).getImm();
+      assert(SS != ST);
+
+      uint16_t W = getRegBitWidth(RD);
+      RegisterCell Res(W);
+      Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS));
+      Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST));
+      putCell(RD, Res, Outputs);
+      break;
+    }
+
+    case TargetOpcode::COPY: {
+      // COPY can transfer a smaller register into a wider one.
+      // If that is the case, fill the remaining high bits with 0.
+      RegisterRef RD = MI->getOperand(0);
+      RegisterRef RS = MI->getOperand(1);
+      assert(RD.Sub == 0);
+      uint16_t WD = getRegBitWidth(RD);
+      uint16_t WS = getRegBitWidth(RS);
+      assert(WD >= WS);
+      RegisterCell Src = getCell(RS, Inputs);
+      RegisterCell Res(WD);
+      Res.insert(Src, BitMask(0, WS-1));
+      Res.fill(WS, WD, BitValue::Zero);
+      putCell(RD, Res, Outputs);
+      break;
+    }
+
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+
+// Main W-Z implementation.
+
+void BT::visitPHI(const MachineInstr *PI) {
+  int ThisN = PI->getParent()->getNumber();
+  if (Trace)
+    dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI;
+
+  const MachineOperand &MD = PI->getOperand(0);
+  assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition");
+  RegisterRef DefRR(MD);
+  uint16_t DefBW = ME.getRegBitWidth(DefRR);
+
+  RegisterCell DefC = ME.getCell(DefRR, Map);
+  if (DefC == RegisterCell::self(DefRR.Reg, DefBW))    // XXX slow
+    return;
+
+  bool Changed = false;
+
+  for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) {
+    const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB();
+    int PredN = PB->getNumber();
+    if (Trace)
+      dbgs() << "  edge BB#" << PredN << "->BB#" << ThisN;
+    if (!EdgeExec.count(CFGEdge(PredN, ThisN))) {
+      if (Trace)
+        dbgs() << " not executable\n";
+      continue;
+    }
+
+    RegisterRef RU = PI->getOperand(i);
+    RegisterCell ResC = ME.getCell(RU, Map);
+    if (Trace)
+      dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+             << " cell: " << ResC << "\n";
+    Changed |= DefC.meet(ResC, DefRR.Reg);
+  }
+
+  if (Changed) {
+    if (Trace)
+      dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub)
+             << " cell: " << DefC << "\n";
+    ME.putCell(DefRR, DefC, Map);
+    visitUsesOf(DefRR.Reg);
+  }
+}
+
+
+void BT::visitNonBranch(const MachineInstr *MI) {
+  if (Trace) {
+    int ThisN = MI->getParent()->getNumber();
+    dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI;
+  }
+  if (MI->isDebugValue())
+    return;
+  assert(!MI->isBranch() && "Unexpected branch instruction");
+
+  CellMapType ResMap;
+  bool Eval = ME.evaluate(MI, Map, ResMap);
+
+  if (Trace && Eval) {
+    for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      RegisterRef RU(MO);
+      dbgs() << "  input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+             << " cell: " << ME.getCell(RU, Map) << "\n";
+    }
+    dbgs() << "Outputs:\n";
+    for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end();
+         I != E; ++I) {
+      RegisterRef RD(I->first);
+      dbgs() << "  " << PrintReg(I->first, &ME.TRI) << " cell: "
+             << ME.getCell(RD, ResMap) << "\n";
+    }
+  }
+
+  // Iterate over all definitions of the instruction, and update the
+  // cells accordingly.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    // Visit register defs only.
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    RegisterRef RD(MO);
+    assert(RD.Sub == 0 && "Unexpected sub-register in definition");
+    if (!TargetRegisterInfo::isVirtualRegister(RD.Reg))
+      continue;
+
+    bool Changed = false;
+    if (!Eval || ResMap.count(RD.Reg) == 0) {
+      // Set to "ref" (aka "bottom").
+      uint16_t DefBW = ME.getRegBitWidth(RD);
+      RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
+      if (RefC != ME.getCell(RD, Map)) {
+        ME.putCell(RD, RefC, Map);
+        Changed = true;
+      }
+    } else {
+      RegisterCell DefC = ME.getCell(RD, Map);
+      RegisterCell ResC = ME.getCell(RD, ResMap);
+      // This is a non-phi instruction, so the values of the inputs come
+      // from the same registers each time this instruction is evaluated.
+      // During the propagation, the values of the inputs can become lowered
+      // in the sense of the lattice operation, which may cause different
+      // results to be calculated in subsequent evaluations. This should
+      // not cause the bottoming of the result in the map, since the new
+      // result is already reflecting the lowered inputs.
+      for (uint16_t i = 0, w = DefC.width(); i < w; ++i) {
+        BitValue &V = DefC[i];
+        // Bits that are already "bottom" should not be updated.
+        if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg)
+          continue;
+        // Same for those that are identical in DefC and ResC.
+        if (V == ResC[i])
+          continue;
+        V = ResC[i];
+        Changed = true;
+      }
+      if (Changed)
+        ME.putCell(RD, DefC, Map);
+    }
+    if (Changed)
+      visitUsesOf(RD.Reg);
+  }
+}
+
+
+void BT::visitBranchesFrom(const MachineInstr *BI) {
+  const MachineBasicBlock &B = *BI->getParent();
+  MachineBasicBlock::const_iterator It = BI, End = B.end();
+  BranchTargetList Targets, BTs;
+  bool FallsThrough = true, DefaultToAll = false;
+  int ThisN = B.getNumber();
+
+  do {
+    BTs.clear();
+    const MachineInstr *MI = &*It;
+    if (Trace)
+      dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI;
+    assert(MI->isBranch() && "Expecting branch instruction");
+    InstrExec.insert(MI);
+    bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough);
+    if (!Eval) {
+      // If the evaluation failed, we will add all targets. Keep going in
+      // the loop to mark all executable branches as such.
+      DefaultToAll = true;
+      FallsThrough = true;
+      if (Trace)
+        dbgs() << "  failed to evaluate: will add all CFG successors\n";
+    } else if (!DefaultToAll) {
+      // If evaluated successfully add the targets to the cumulative list.
+      if (Trace) {
+        dbgs() << "  adding targets:";
+        for (unsigned i = 0, n = BTs.size(); i < n; ++i)
+          dbgs() << " BB#" << BTs[i]->getNumber();
+        if (FallsThrough)
+          dbgs() << "\n  falls through\n";
+        else
+          dbgs() << "\n  does not fall through\n";
+      }
+      Targets.insert(BTs.begin(), BTs.end());
+    }
+    ++It;
+  } while (FallsThrough && It != End);
+
+  typedef MachineBasicBlock::const_succ_iterator succ_iterator;
+  if (!DefaultToAll) {
+    // Need to add all CFG successors that lead to EH landing pads.
+    // There won't be explicit branches to these blocks, but they must
+    // be processed.
+    for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
+      const MachineBasicBlock *SB = *I;
+      if (SB->isEHPad())
+        Targets.insert(SB);
+    }
+    if (FallsThrough) {
+      MachineFunction::const_iterator BIt = B.getIterator();
+      MachineFunction::const_iterator Next = std::next(BIt);
+      if (Next != MF.end())
+        Targets.insert(&*Next);
+    }
+  } else {
+    for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I)
+      Targets.insert(*I);
+  }
+
+  for (unsigned i = 0, n = Targets.size(); i < n; ++i) {
+    int TargetN = Targets[i]->getNumber();
+    FlowQ.push(CFGEdge(ThisN, TargetN));
+  }
+}
+
+
+void BT::visitUsesOf(unsigned Reg) {
+  if (Trace)
+    dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
+
+  typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+  use_iterator End = MRI.use_nodbg_end();
+  for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (!InstrExec.count(UseI))
+      continue;
+    if (UseI->isPHI())
+      visitPHI(UseI);
+    else if (!UseI->isBranch())
+      visitNonBranch(UseI);
+    else
+      visitBranchesFrom(UseI);
+  }
+}
+
+
+BT::RegisterCell BT::get(RegisterRef RR) const {
+  return ME.getCell(RR, Map);
+}
+
+
+void BT::put(RegisterRef RR, const RegisterCell &RC) {
+  ME.putCell(RR, RC, Map);
+}
+
+
+// Replace all references to bits from OldRR with the corresponding bits
+// in NewRR.
+void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
+  assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map");
+  BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
+  BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
+  uint16_t OMB = OM.first(), OME = OM.last();
+  uint16_t NMB = NM.first(), NME = NM.last();
+  (void)NME;
+  assert((OME-OMB == NME-NMB) &&
+         "Substituting registers of different lengths");
+  for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+    RegisterCell &RC = I->second;
+    for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+      BitValue &V = RC[i];
+      if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg)
+        continue;
+      if (V.RefI.Pos < OMB || V.RefI.Pos > OME)
+        continue;
+      V.RefI.Reg = NewRR.Reg;
+      V.RefI.Pos += NMB-OMB;
+    }
+  }
+}
+
+
+// Check if the block has been "executed" during propagation. (If not, the
+// block is dead, but it may still appear to be reachable.)
+bool BT::reached(const MachineBasicBlock *B) const {
+  int BN = B->getNumber();
+  assert(BN >= 0);
+  for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
+       I != E; ++I) {
+    if (I->second == BN)
+      return true;
+  }
+  return false;
+}
+
+
+void BT::reset() {
+  EdgeExec.clear();
+  InstrExec.clear();
+  Map.clear();
+}
+
+
+void BT::run() {
+  reset();
+  assert(FlowQ.empty());
+
+  typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits;
+  const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF);
+
+  unsigned MaxBN = 0;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    assert(I->getNumber() >= 0 && "Disconnected block");
+    unsigned BN = I->getNumber();
+    if (BN > MaxBN)
+      MaxBN = BN;
+  }
+
+  // Keep track of visited blocks.
+  BitVector BlockScanned(MaxBN+1);
+
+  int EntryN = Entry->getNumber();
+  // Generate a fake edge to get something to start with.
+  FlowQ.push(CFGEdge(-1, EntryN));
+
+  while (!FlowQ.empty()) {
+    CFGEdge Edge = FlowQ.front();
+    FlowQ.pop();
+
+    if (EdgeExec.count(Edge))
+      continue;
+    EdgeExec.insert(Edge);
+
+    const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
+    MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
+    // Visit PHI nodes first.
+    while (It != End && It->isPHI()) {
+      const MachineInstr *PI = &*It++;
+      InstrExec.insert(PI);
+      visitPHI(PI);
+    }
+
+    // If this block has already been visited through a flow graph edge,
+    // then the instructions have already been processed. Any updates to
+    // the cells would now only happen through visitUsesOf...
+    if (BlockScanned[Edge.second])
+      continue;
+    BlockScanned[Edge.second] = true;
+
+    // Visit non-branch instructions.
+    while (It != End && !It->isBranch()) {
+      const MachineInstr *MI = &*It++;
+      InstrExec.insert(MI);
+      visitNonBranch(MI);
+    }
+    // If block end has been reached, add the fall-through edge to the queue.
+    if (It == End) {
+      MachineFunction::const_iterator BIt = B.getIterator();
+      MachineFunction::const_iterator Next = std::next(BIt);
+      if (Next != MF.end() && B.isSuccessor(&*Next)) {
+        int ThisN = B.getNumber();
+        int NextN = Next->getNumber();
+        FlowQ.push(CFGEdge(ThisN, NextN));
+      }
+    } else {
+      // Handle the remaining sequence of branches. This function will update
+      // the work queue.
+      visitBranchesFrom(It);
+    }
+  } // while (!FlowQ->empty())
+
+  if (Trace) {
+    dbgs() << "Cells after propagation:\n";
+    for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
+      dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
+  }
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
new file mode 100644
index 0000000..959c831
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -0,0 +1,435 @@
+//===--- BitTracker.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITTRACKER_H
+#define BITTRACKER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#include <map>
+#include <queue>
+#include <set>
+
+namespace llvm {
+  class ConstantInt;
+  class MachineRegisterInfo;
+  class MachineBasicBlock;
+  class MachineInstr;
+  class MachineOperand;
+  class raw_ostream;
+
+struct BitTracker {
+  struct BitRef;
+  struct RegisterRef;
+  struct BitValue;
+  struct BitMask;
+  struct RegisterCell;
+  struct MachineEvaluator;
+
+  typedef SetVector<const MachineBasicBlock *> BranchTargetList;
+
+  typedef std::map<unsigned, RegisterCell> CellMapType;
+
+  BitTracker(const MachineEvaluator &E, MachineFunction &F);
+  ~BitTracker();
+
+  void run();
+  void trace(bool On = false) { Trace = On; }
+  bool has(unsigned Reg) const;
+  const RegisterCell &lookup(unsigned Reg) const;
+  RegisterCell get(RegisterRef RR) const;
+  void put(RegisterRef RR, const RegisterCell &RC);
+  void subst(RegisterRef OldRR, RegisterRef NewRR);
+  bool reached(const MachineBasicBlock *B) const;
+
+private:
+  void visitPHI(const MachineInstr *PI);
+  void visitNonBranch(const MachineInstr *MI);
+  void visitBranchesFrom(const MachineInstr *BI);
+  void visitUsesOf(unsigned Reg);
+  void reset();
+
+  typedef std::pair<int,int> CFGEdge;
+  typedef std::set<CFGEdge> EdgeSetType;
+  typedef std::set<const MachineInstr *> InstrSetType;
+  typedef std::queue<CFGEdge> EdgeQueueType;
+
+  EdgeSetType EdgeExec;       // Executable flow graph edges.
+  InstrSetType InstrExec;     // Executable instructions.
+  EdgeQueueType FlowQ;        // Work queue of CFG edges.
+  bool Trace;                 // Enable tracing for debugging.
+
+  const MachineEvaluator &ME;
+  MachineFunction &MF;
+  MachineRegisterInfo &MRI;
+  CellMapType &Map;
+};
+
+
+// Abstraction of a reference to bit at position Pos from a register Reg.
+struct BitTracker::BitRef {
+  BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+  bool operator== (const BitRef &BR) const {
+    // If Reg is 0, disregard Pos.
+    return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
+  }
+  unsigned Reg;
+  uint16_t Pos;
+};
+
+
+// Abstraction of a register reference in MachineOperand.  It contains the
+// register number and the subregister index.
+struct BitTracker::RegisterRef {
+  RegisterRef(unsigned R = 0, unsigned S = 0)
+    : Reg(R), Sub(S) {}
+  RegisterRef(const MachineOperand &MO)
+      : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+  unsigned Reg, Sub;
+};
+
+
+// Value that a single bit can take.  This is outside of the context of
+// any register, it is more of an abstraction of the two-element set of
+// possible bit values.  One extension here is the "Ref" type, which
+// indicates that this bit takes the same value as the bit described by
+// RefInfo.
+struct BitTracker::BitValue {
+  enum ValueType {
+    Top,    // Bit not yet defined.
+    Zero,   // Bit = 0.
+    One,    // Bit = 1.
+    Ref     // Bit value same as the one described in RefI.
+    // Conceptually, there is no explicit "bottom" value: the lattice's
+    // bottom will be expressed as a "ref to itself", which, in the context
+    // of registers, could be read as "this value of this bit is defined by
+    // this bit".
+    // The ordering is:
+    //   x <= Top,
+    //   Self <= x, where "Self" is "ref to itself".
+    // This makes the value lattice different for each virtual register
+    // (even for each bit in the same virtual register), since the "bottom"
+    // for one register will be a simple "ref" for another register.
+    // Since we do not store the "Self" bit and register number, the meet
+    // operation will need to take it as a parameter.
+    //
+    // In practice there is a special case for values that are not associa-
+    // ted with any specific virtual register. An example would be a value
+    // corresponding to a bit of a physical register, or an intermediate
+    // value obtained in some computation (such as instruction evaluation).
+    // Such cases are identical to the usual Ref type, but the register
+    // number is 0. In such case the Pos field of the reference is ignored.
+    //
+    // What is worthy of notice is that in value V (that is a "ref"), as long
+    // as the RefI.Reg is not 0, it may actually be the same register as the
+    // one in which V will be contained.  If the RefI.Pos refers to the posi-
+    // tion of V, then V is assumed to be "bottom" (as a "ref to itself"),
+    // otherwise V is taken to be identical to the referenced bit of the
+    // same register.
+    // If RefI.Reg is 0, however, such a reference to the same register is
+    // not possible.  Any value V that is a "ref", and whose RefI.Reg is 0
+    // is treated as "bottom".
+  };
+  ValueType Type;
+  BitRef RefI;
+
+  BitValue(ValueType T = Top) : Type(T) {}
+  BitValue(bool B) : Type(B ? One : Zero) {}
+  BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
+
+  bool operator== (const BitValue &V) const {
+    if (Type != V.Type)
+      return false;
+    if (Type == Ref && !(RefI == V.RefI))
+      return false;
+    return true;
+  }
+  bool operator!= (const BitValue &V) const {
+    return !operator==(V);
+  }
+  bool is(unsigned T) const {
+    assert(T == 0 || T == 1);
+    return T == 0 ? Type == Zero
+                  : (T == 1 ? Type == One : false);
+  }
+
+  // The "meet" operation is the "." operation in a semilattice (L, ., T, B):
+  // (1)  x.x = x
+  // (2)  x.y = y.x
+  // (3)  x.(y.z) = (x.y).z
+  // (4)  x.T = x  (i.e. T = "top")
+  // (5)  x.B = B  (i.e. B = "bottom")
+  //
+  // This "meet" function will update the value of the "*this" object with
+  // the newly calculated one, and return "true" if the value of *this has
+  // changed, and "false" otherwise.
+  // To prove that it satisfies the conditions (1)-(5), it is sufficient
+  // to show that a relation
+  //   x <= y  <=>  x.y = x
+  // defines a partial order (i.e. that "meet" is same as "infimum").
+  bool meet(const BitValue &V, const BitRef &Self) {
+    // First, check the cases where there is nothing to be done.
+    if (Type == Ref && RefI == Self)    // Bottom.meet(V) = Bottom (i.e. This)
+      return false;
+    if (V.Type == Top)                  // This.meet(Top) = This
+      return false;
+    if (*this == V)                     // This.meet(This) = This
+      return false;
+
+    // At this point, we know that the value of "this" will change.
+    // If it is Top, it will become the same as V, otherwise it will
+    // become "bottom" (i.e. Self).
+    if (Type == Top) {
+      Type = V.Type;
+      RefI = V.RefI;  // This may be irrelevant, but copy anyway.
+      return true;
+    }
+    // Become "bottom".
+    Type = Ref;
+    RefI = Self;
+    return true;
+  }
+
+  // Create a reference to the bit value V.
+  static BitValue ref(const BitValue &V);
+  // Create a "self".
+  static BitValue self(const BitRef &Self = BitRef());
+
+  bool num() const {
+    return Type == Zero || Type == One;
+  }
+  operator bool() const {
+    assert(Type == Zero || Type == One);
+    return Type == One;
+  }
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
+};
+
+
+// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
+inline BitTracker::BitValue
+BitTracker::BitValue::ref(const BitValue &V) {
+  if (V.Type != Ref)
+    return BitValue(V.Type);
+  if (V.RefI.Reg != 0)
+    return BitValue(V.RefI.Reg, V.RefI.Pos);
+  return self();
+}
+
+
+inline BitTracker::BitValue
+BitTracker::BitValue::self(const BitRef &Self) {
+  return BitValue(Self.Reg, Self.Pos);
+}
+
+
+// A sequence of bits starting from index B up to and including index E.
+// If E < B, the mask represents two sections: [0..E] and [B..W) where
+// W is the width of the register.
+struct BitTracker::BitMask {
+  BitMask() : B(0), E(0) {}
+  BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+  uint16_t first() const { return B; }
+  uint16_t last() const { return E; }
+private:
+  uint16_t B, E;
+};
+
+
+// Representation of a register: a list of BitValues.
+struct BitTracker::RegisterCell {
+  RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
+
+  uint16_t width() const {
+    return Bits.size();
+  }
+  const BitValue &operator[](uint16_t BitN) const {
+    assert(BitN < Bits.size());
+    return Bits[BitN];
+  }
+  BitValue &operator[](uint16_t BitN) {
+    assert(BitN < Bits.size());
+    return Bits[BitN];
+  }
+
+  bool meet(const RegisterCell &RC, unsigned SelfR);
+  RegisterCell &insert(const RegisterCell &RC, const BitMask &M);
+  RegisterCell extract(const BitMask &M) const;  // Returns a new cell.
+  RegisterCell &rol(uint16_t Sh);    // Rotate left.
+  RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V);
+  RegisterCell &cat(const RegisterCell &RC);  // Concatenate.
+  uint16_t cl(bool B) const;
+  uint16_t ct(bool B) const;
+
+  bool operator== (const RegisterCell &RC) const;
+  bool operator!= (const RegisterCell &RC) const {
+    return !operator==(RC);
+  }
+
+  // Generate a "ref" cell for the corresponding register. In the resulting
+  // cell each bit will be described as being the same as the corresponding
+  // bit in register Reg (i.e. the cell is "defined" by register Reg).
+  static RegisterCell self(unsigned Reg, uint16_t Width);
+  // Generate a "top" cell of given size.
+  static RegisterCell top(uint16_t Width);
+  // Generate a cell that is a "ref" to another cell.
+  static RegisterCell ref(const RegisterCell &C);
+
+private:
+  // The DefaultBitN is here only to avoid frequent reallocation of the
+  // memory in the vector.
+  static const unsigned DefaultBitN = 32;
+  typedef SmallVector<BitValue, DefaultBitN> BitValueList;
+  BitValueList Bits;
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
+};
+
+
+inline bool BitTracker::has(unsigned Reg) const {
+  return Map.find(Reg) != Map.end();
+}
+
+
+inline const BitTracker::RegisterCell&
+BitTracker::lookup(unsigned Reg) const {
+  CellMapType::const_iterator F = Map.find(Reg);
+  assert(F != Map.end());
+  return F->second;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
+  RegisterCell RC(Width);
+  for (uint16_t i = 0; i < Width; ++i)
+    RC.Bits[i] = BitValue::self(BitRef(Reg, i));
+  return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::top(uint16_t Width) {
+  RegisterCell RC(Width);
+  for (uint16_t i = 0; i < Width; ++i)
+    RC.Bits[i] = BitValue(BitValue::Top);
+  return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::ref(const RegisterCell &C) {
+  uint16_t W = C.width();
+  RegisterCell RC(W);
+  for (unsigned i = 0; i < W; ++i)
+    RC[i] = BitValue::ref(C[i]);
+  return RC;
+}
+
+// A class to evaluate target's instructions and update the cell maps.
+// This is used internally by the bit tracker.  A target that wants to
+// utilize this should implement the evaluation functions (noted below)
+// in a subclass of this class.
+struct BitTracker::MachineEvaluator {
+  MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
+      : TRI(T), MRI(M) {}
+  virtual ~MachineEvaluator() {}
+
+  uint16_t getRegBitWidth(const RegisterRef &RR) const;
+
+  RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
+  void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+  // A result of any operation should use refs to the source cells, not
+  // the cells directly. This function is a convenience wrapper to quickly
+  // generate a ref for a cell corresponding to a register reference.
+  RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const {
+    RegisterCell RC = getCell(RR, M);
+    return RegisterCell::ref(RC);
+  }
+
+  // Helper functions.
+  // Check if a cell is an immediate value (i.e. all bits are either 0 or 1).
+  bool isInt(const RegisterCell &A) const;
+  // Convert cell to an immediate value.
+  uint64_t toInt(const RegisterCell &A) const;
+
+  // Generate cell from an immediate value.
+  RegisterCell eIMM(int64_t V, uint16_t W) const;
+  RegisterCell eIMM(const ConstantInt *CI) const;
+
+  // Arithmetic.
+  RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const;
+
+  // Shifts.
+  RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const;
+  RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const;
+  RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const;
+
+  // Logical.
+  RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eNOT(const RegisterCell &A1) const;
+
+  // Set bit, clear bit.
+  RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const;
+  RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const;
+
+  // Count leading/trailing bits (zeros/ones).
+  RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const;
+  RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const;
+
+  // Sign/zero extension.
+  RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const;
+  RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const;
+
+  // Extract/insert
+  // XTR R,b,e:  extract bits from A1 starting at bit b, ending at e-1.
+  // INS R,S,b:  take R and replace bits starting from b with S.
+  RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const;
+  RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2,
+                    uint16_t AtN) const;
+
+  // User-provided functions for individual targets:
+
+  // Return a sub-register mask that indicates which bits in Reg belong
+  // to the subregister Sub. These bits are assumed to be contiguous in
+  // the super-register, and have the same ordering in the sub-register
+  // as in the super-register. It is valid to call this function with
+  // Sub == 0, in this case, the function should return a mask that spans
+  // the entire register Reg (which is what the default implementation
+  // does).
+  virtual BitMask mask(unsigned Reg, unsigned Sub) const;
+  // Indicate whether a given register class should be tracked.
+  virtual bool track(const TargetRegisterClass *RC) const { return true; }
+  // Evaluate a non-branching machine instruction, given the cell map with
+  // the input values. Place the results in the Outputs map. Return "true"
+  // if evaluation succeeded, "false" otherwise.
+  virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+                        CellMapType &Outputs) const;
+  // Evaluate a branch, given the cell map with the input values. Fill out
+  // a list of all possible branch targets and indicate (through a flag)
+  // whether the branch could fall-through. Return "true" if this information
+  // has been successfully computed, "false" otherwise.
+  virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+                        BranchTargetList &Targets, bool &FallsThru) const = 0;
+
+  const TargetRegisterInfo &TRI;
+  MachineRegisterInfo &MRI;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
new file mode 100644
index 0000000..4a9c341
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -0,0 +1,1605 @@
+//===-- HexagonDisassembler.cpp - Disassembler for Hexagon ISA ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-disassembler"
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <vector>
+
+using namespace llvm;
+using namespace Hexagon;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// \brief Hexagon disassembler for all Hexagon platforms.
+class HexagonDisassembler : public MCDisassembler {
+public:
+  std::unique_ptr<MCInstrInfo const> const MCII;
+  std::unique_ptr<MCInst *> CurrentBundle;
+  HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+                      MCInstrInfo const *MCII)
+      : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
+
+  DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
+                                    ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                    raw_ostream &VStream, raw_ostream &CStream,
+                                    bool &Complete) const;
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
+
+  void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
+  void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
+};
+}
+
+// Forward declare these because the auto-generated code will reference them.
+// Definitions are further down.
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+                                 void const *Decoder);
+
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+                                 raw_ostream &os);
+
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+                                       uint64_t Address, const void *Decoder);
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
+
+#include "HexagonGenDisassemblerTables.inc"
+
+static MCDisassembler *createHexagonDisassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI,
+                                                 MCContext &Ctx) {
+  return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
+}
+
+extern "C" void LLVMInitializeHexagonDisassembler() {
+  TargetRegistry::RegisterMCDisassembler(TheHexagonTarget,
+                                         createHexagonDisassembler);
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 ArrayRef<uint8_t> Bytes,
+                                                 uint64_t Address,
+                                                 raw_ostream &os,
+                                                 raw_ostream &cs) const {
+  DecodeStatus Result = DecodeStatus::Success;
+  bool Complete = false;
+  Size = 0;
+
+  *CurrentBundle = &MI;
+  MI = HexagonMCInstrInfo::createBundle();
+  while (Result == Success && Complete == false) {
+    if (Bytes.size() < HEXAGON_INSTR_SIZE)
+      return MCDisassembler::Fail;
+    MCInst *Inst = new (getContext()) MCInst;
+    Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete);
+    MI.addOperand(MCOperand::createInst(Inst));
+    Size += HEXAGON_INSTR_SIZE;
+    Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
+  }
+  if(Result == MCDisassembler::Fail)
+    return Result;
+  HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+  if(!Checker.check())
+    return MCDisassembler::Fail;
+  return MCDisassembler::Success;
+}
+
+namespace {
+HexagonDisassembler const &disassembler(void const *Decoder) {
+  return *static_cast<HexagonDisassembler const *>(Decoder);
+}
+MCContext &contextFromDecoder(void const *Decoder) {
+  return disassembler(Decoder).getContext();
+}
+}
+
+DecodeStatus HexagonDisassembler::getSingleInstruction(
+    MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address,
+    raw_ostream &os, raw_ostream &cs, bool &Complete) const {
+  assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
+
+  uint32_t Instruction =
+      (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+
+  auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+  if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+      HexagonII::INST_PARSE_LOOP_END) {
+    if (BundleSize == 0)
+      HexagonMCInstrInfo::setInnerLoop(MCB);
+    else if (BundleSize == 1)
+      HexagonMCInstrInfo::setOuterLoop(MCB);
+    else
+      return DecodeStatus::Fail;
+  }
+
+  DecodeStatus Result = DecodeStatus::Success;
+  if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+      HexagonII::INST_PARSE_DUPLEX) {
+    // Determine the instruction class of each instruction in the duplex.
+    unsigned duplexIClass, IClassLow, IClassHigh;
+
+    duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
+    switch (duplexIClass) {
+    default:
+      return MCDisassembler::Fail;
+    case 0:
+      IClassLow = HexagonII::HSIG_L1;
+      IClassHigh = HexagonII::HSIG_L1;
+      break;
+    case 1:
+      IClassLow = HexagonII::HSIG_L2;
+      IClassHigh = HexagonII::HSIG_L1;
+      break;
+    case 2:
+      IClassLow = HexagonII::HSIG_L2;
+      IClassHigh = HexagonII::HSIG_L2;
+      break;
+    case 3:
+      IClassLow = HexagonII::HSIG_A;
+      IClassHigh = HexagonII::HSIG_A;
+      break;
+    case 4:
+      IClassLow = HexagonII::HSIG_L1;
+      IClassHigh = HexagonII::HSIG_A;
+      break;
+    case 5:
+      IClassLow = HexagonII::HSIG_L2;
+      IClassHigh = HexagonII::HSIG_A;
+      break;
+    case 6:
+      IClassLow = HexagonII::HSIG_S1;
+      IClassHigh = HexagonII::HSIG_A;
+      break;
+    case 7:
+      IClassLow = HexagonII::HSIG_S2;
+      IClassHigh = HexagonII::HSIG_A;
+      break;
+    case 8:
+      IClassLow = HexagonII::HSIG_S1;
+      IClassHigh = HexagonII::HSIG_L1;
+      break;
+    case 9:
+      IClassLow = HexagonII::HSIG_S1;
+      IClassHigh = HexagonII::HSIG_L2;
+      break;
+    case 10:
+      IClassLow = HexagonII::HSIG_S1;
+      IClassHigh = HexagonII::HSIG_S1;
+      break;
+    case 11:
+      IClassLow = HexagonII::HSIG_S2;
+      IClassHigh = HexagonII::HSIG_S1;
+      break;
+    case 12:
+      IClassLow = HexagonII::HSIG_S2;
+      IClassHigh = HexagonII::HSIG_L1;
+      break;
+    case 13:
+      IClassLow = HexagonII::HSIG_S2;
+      IClassHigh = HexagonII::HSIG_L2;
+      break;
+    case 14:
+      IClassLow = HexagonII::HSIG_S2;
+      IClassHigh = HexagonII::HSIG_S2;
+      break;
+    }
+
+    // Set the MCInst to be a duplex instruction. Which one doesn't matter.
+    MI.setOpcode(Hexagon::DuplexIClass0);
+
+    // Decode each instruction in the duplex.
+    // Create an MCInst for each instruction.
+    unsigned instLow = Instruction & 0x1fff;
+    unsigned instHigh = (Instruction >> 16) & 0x1fff;
+    unsigned opLow;
+    if (GetSubinstOpcode(IClassLow, instLow, opLow, os) !=
+        MCDisassembler::Success)
+      return MCDisassembler::Fail;
+    unsigned opHigh;
+    if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) !=
+        MCDisassembler::Success)
+      return MCDisassembler::Fail;
+    MCInst *MILow = new (getContext()) MCInst;
+    MILow->setOpcode(opLow);
+    MCInst *MIHigh = new (getContext()) MCInst;
+    MIHigh->setOpcode(opHigh);
+    addSubinstOperands(MILow, opLow, instLow);
+    addSubinstOperands(MIHigh, opHigh, instHigh);
+    // see ConvertToSubInst() in
+    // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+
+    // Add the duplex instruction MCInsts as operands to the passed in MCInst.
+    MCOperand OPLow = MCOperand::createInst(MILow);
+    MCOperand OPHigh = MCOperand::createInst(MIHigh);
+    MI.addOperand(OPLow);
+    MI.addOperand(OPHigh);
+    Complete = true;
+  } else {
+    if ((Instruction & HexagonII::INST_PARSE_MASK) ==
+        HexagonII::INST_PARSE_PACKET_END)
+      Complete = true;
+    // Calling the auto-generated decoder function.
+    Result =
+        decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+
+    // If a, "standard" insn isn't found check special cases.
+    if (MCDisassembler::Success != Result ||
+        MI.getOpcode() == Hexagon::A4_ext) {
+      Result = decodeImmext(MI, Instruction, this);
+      if (MCDisassembler::Success != Result) {
+        Result = decodeSpecial(MI, Instruction);
+      }
+    } else {
+      // If the instruction is a compound instruction, register values will
+      // follow the duplex model, so the register values in the MCInst are
+      // incorrect. If the instruction is a compound, loop through the
+      // operands and change registers appropriately.
+      if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
+          HexagonII::TypeCOMPOUND) {
+        for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
+          if (i->isReg()) {
+            unsigned reg = i->getReg() - Hexagon::R0;
+            i->setReg(getRegFromSubinstEncoding(reg));
+          }
+        }
+      }
+    }
+  }
+
+  if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
+    unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
+    MCOperand &MCO = MI.getOperand(OpIndex);
+    assert(MCO.isReg() && "New value consumers must be registers");
+    unsigned Register =
+        getContext().getRegisterInfo()->getEncodingValue(MCO.getReg());
+    if ((Register & 0x6) == 0)
+      // HexagonPRM 10.11 Bit 1-2 == 0 is reserved
+      return MCDisassembler::Fail;
+    unsigned Lookback = (Register & 0x6) >> 1;
+    unsigned Offset = 1;
+    bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
+    auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+    auto i = Instructions.end() - 1;
+    for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
+      if (i == n)
+        // Couldn't find producer
+        return MCDisassembler::Fail;
+      if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst()))
+        // Skip scalars when calculating distances for vectors
+        ++Lookback;
+      if (HexagonMCInstrInfo::isImmext(*i->getInst()))
+        ++Lookback;
+      if (Offset == Lookback)
+        break;
+    }
+    auto const &Inst = *i->getInst();
+    bool SubregBit = (Register & 0x1) != 0;
+    if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) {
+      // If subreg bit is set we're selecting the second produced newvalue
+      unsigned Producer =
+          HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg();
+      assert(Producer != Hexagon::NoRegister);
+      MCO.setReg(Producer);
+    } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
+      unsigned Producer =
+          HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
+      if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
+        Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
+      else if (SubregBit)
+        // Subreg bit should not be set for non-doublevector newvalue producers
+        return MCDisassembler::Fail;
+      assert(Producer != Hexagon::NoRegister);
+      MCO.setReg(Producer);
+    } else
+      return MCDisassembler::Fail;
+  }
+
+  adjustExtendedInstructions(MI, MCB);
+  MCInst const *Extender =
+    HexagonMCInstrInfo::extenderForIndex(MCB,
+                                         HexagonMCInstrInfo::bundleSize(MCB));
+  if(Extender != nullptr) {
+    MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
+                          *MI.getOperand(1).getInst() : MI;
+    if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
+        !HexagonMCInstrInfo::isExtended(*MCII, Inst))
+      return MCDisassembler::Fail;
+  }
+  return Result;
+}
+
+void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
+                                                     MCInst const &MCB) const {
+  if (!HexagonMCInstrInfo::hasExtenderForIndex(
+          MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
+    unsigned opcode;
+    // This code is used by the disassembler to disambiguate between GP
+    // relative and absolute addressing instructions since they both have
+    // same encoding bits. However, an absolute addressing instruction must
+    // follow an immediate extender. Disassembler alwaus select absolute
+    // addressing instructions first and uses this code to change them into
+    // GP relative instruction in the absence of the corresponding immediate
+    // extender.
+    switch (MCI.getOpcode()) {
+    case Hexagon::S2_storerbabs:
+      opcode = Hexagon::S2_storerbgp;
+      break;
+    case Hexagon::S2_storerhabs:
+      opcode = Hexagon::S2_storerhgp;
+      break;
+    case Hexagon::S2_storerfabs:
+      opcode = Hexagon::S2_storerfgp;
+      break;
+    case Hexagon::S2_storeriabs:
+      opcode = Hexagon::S2_storerigp;
+      break;
+    case Hexagon::S2_storerbnewabs:
+      opcode = Hexagon::S2_storerbnewgp;
+      break;
+    case Hexagon::S2_storerhnewabs:
+      opcode = Hexagon::S2_storerhnewgp;
+      break;
+    case Hexagon::S2_storerinewabs:
+      opcode = Hexagon::S2_storerinewgp;
+      break;
+    case Hexagon::S2_storerdabs:
+      opcode = Hexagon::S2_storerdgp;
+      break;
+    case Hexagon::L4_loadrb_abs:
+      opcode = Hexagon::L2_loadrbgp;
+      break;
+    case Hexagon::L4_loadrub_abs:
+      opcode = Hexagon::L2_loadrubgp;
+      break;
+    case Hexagon::L4_loadrh_abs:
+      opcode = Hexagon::L2_loadrhgp;
+      break;
+    case Hexagon::L4_loadruh_abs:
+      opcode = Hexagon::L2_loadruhgp;
+      break;
+    case Hexagon::L4_loadri_abs:
+      opcode = Hexagon::L2_loadrigp;
+      break;
+    case Hexagon::L4_loadrd_abs:
+      opcode = Hexagon::L2_loadrdgp;
+      break;
+    default:
+      opcode = MCI.getOpcode();
+    }
+    MCI.setOpcode(opcode);
+  }
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+                                        ArrayRef<MCPhysReg> Table) {
+  if (RegNo < Table.size()) {
+    Inst.addOperand(MCOperand::createReg(Table[RegNo]));
+    return MCDisassembler::Success;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  static const MCPhysReg IntRegDecoderTable[] = {
+      Hexagon::R0,  Hexagon::R1,  Hexagon::R2,  Hexagon::R3,  Hexagon::R4,
+      Hexagon::R5,  Hexagon::R6,  Hexagon::R7,  Hexagon::R8,  Hexagon::R9,
+      Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+      Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+      Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+      Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+      Hexagon::R30, Hexagon::R31};
+
+  return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
+}
+
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg VecRegDecoderTable[] = {
+      Hexagon::V0,  Hexagon::V1,  Hexagon::V2,  Hexagon::V3,  Hexagon::V4,
+      Hexagon::V5,  Hexagon::V6,  Hexagon::V7,  Hexagon::V8,  Hexagon::V9,
+      Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+      Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19,
+      Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24,
+      Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29,
+      Hexagon::V30, Hexagon::V31};
+
+  return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable);
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg DoubleRegDecoderTable[] = {
+      Hexagon::D0,  Hexagon::D1,  Hexagon::D2,  Hexagon::D3,
+      Hexagon::D4,  Hexagon::D5,  Hexagon::D6,  Hexagon::D7,
+      Hexagon::D8,  Hexagon::D9,  Hexagon::D10, Hexagon::D11,
+      Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
+
+  return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg VecDblRegDecoderTable[] = {
+      Hexagon::W0,  Hexagon::W1,  Hexagon::W2,  Hexagon::W3,
+      Hexagon::W4,  Hexagon::W5,  Hexagon::W6,  Hexagon::W7,
+      Hexagon::W8,  Hexagon::W9,  Hexagon::W10, Hexagon::W11,
+      Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+
+  return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                uint64_t /*Address*/,
+                                                const void *Decoder) {
+  static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+                                                  Hexagon::P2, Hexagon::P3};
+
+  return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t /*Address*/,
+                                                   const void *Decoder) {
+  static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
+                                                     Hexagon::Q2, Hexagon::Q3};
+
+  return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable);
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t /*Address*/,
+                                               const void *Decoder) {
+  static const MCPhysReg CtrlRegDecoderTable[] = {
+    Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+    Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
+    Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+    Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+  };
+
+  if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+    return MCDisassembler::Fail;
+
+  if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CtrlRegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                 uint64_t /*Address*/,
+                                                 const void *Decoder) {
+  static const MCPhysReg CtrlReg64DecoderTable[] = {
+      Hexagon::C1_0,   Hexagon::NoRegister,
+      Hexagon::C3_2,   Hexagon::NoRegister,
+      Hexagon::C7_6,   Hexagon::NoRegister,
+      Hexagon::C9_8,   Hexagon::NoRegister,
+      Hexagon::C11_10, Hexagon::NoRegister,
+      Hexagon::CS,     Hexagon::NoRegister,
+      Hexagon::UPC,    Hexagon::NoRegister
+  };
+
+  if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+    return MCDisassembler::Fail;
+
+  if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CtrlReg64DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t /*Address*/,
+                                               const void *Decoder) {
+  unsigned Register = 0;
+  switch (RegNo) {
+  case 0:
+    Register = Hexagon::M0;
+    break;
+  case 1:
+    Register = Hexagon::M1;
+    break;
+  default:
+    return MCDisassembler::Fail;
+  }
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+namespace {
+uint32_t fullValue(MCInstrInfo const &MCII,
+                  MCInst &MCB,
+                  MCInst &MI,
+                  int64_t Value) {
+  MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+    MCB, HexagonMCInstrInfo::bundleSize(MCB));
+  if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+    return Value;
+  unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+  uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+  int64_t Bits;
+  bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+  assert(Success);(void)Success;
+  uint32_t Upper26 = static_cast<uint32_t>(Bits);
+  uint32_t Operand = Upper26 | Lower6;
+  return Operand;
+}
+template <size_t T>
+void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  int64_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, SignExtend64<T>(tmp));
+  int64_t Extended = SignExtend64<32>(FullValue);
+  HexagonMCInstrInfo::addConstant(MI, Extended,
+                                  Disassembler.getContext());
+}
+}
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+                                       uint64_t /*Address*/,
+                                       const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  int64_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, tmp);
+  assert(FullValue >= 0 && "Negative in unsigned decoder");
+  HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<16>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<12>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<11>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<13>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
+                                    uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<14>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
+                                  uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<10>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
+                                 const void *Decoder) {
+  signedDecoder<8>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<4>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<5>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<7>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<10>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<19>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+// custom decoder for various jump/call immediates
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+  // r13_2 is not extendable, so if there are no extent bits, it's r13_2
+  if (Bits == 0)
+    Bits = 15;
+  uint32_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, SignExtend64(tmp, Bits));
+  int64_t Extended = SignExtend64<32>(FullValue) + Address;
+  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
+                                              0, 4))
+    HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+  return MCDisassembler::Success;
+}
+
+// Addressing mode dependent load store opcode map.
+//   - If an insn is preceded by an extender the address is absolute.
+//      - memw(##symbol) = r0
+//   - If an insn is not preceded by an extender the address is GP relative.
+//      - memw(gp + #symbol) = r0
+// Please note that the instructions must be ordered in the descending order
+// of their opcode.
+// HexagonII::INST_ICLASS_ST
+static const unsigned int StoreConditionalOpcodeData[][2] = {
+    {S4_pstorerdfnew_abs, 0xafc02084},
+    {S4_pstorerdtnew_abs, 0xafc02080},
+    {S4_pstorerdf_abs, 0xafc00084},
+    {S4_pstorerdt_abs, 0xafc00080},
+    {S4_pstorerinewfnew_abs, 0xafa03084},
+    {S4_pstorerinewtnew_abs, 0xafa03080},
+    {S4_pstorerhnewfnew_abs, 0xafa02884},
+    {S4_pstorerhnewtnew_abs, 0xafa02880},
+    {S4_pstorerbnewfnew_abs, 0xafa02084},
+    {S4_pstorerbnewtnew_abs, 0xafa02080},
+    {S4_pstorerinewf_abs, 0xafa01084},
+    {S4_pstorerinewt_abs, 0xafa01080},
+    {S4_pstorerhnewf_abs, 0xafa00884},
+    {S4_pstorerhnewt_abs, 0xafa00880},
+    {S4_pstorerbnewf_abs, 0xafa00084},
+    {S4_pstorerbnewt_abs, 0xafa00080},
+    {S4_pstorerifnew_abs, 0xaf802084},
+    {S4_pstoreritnew_abs, 0xaf802080},
+    {S4_pstorerif_abs, 0xaf800084},
+    {S4_pstorerit_abs, 0xaf800080},
+    {S4_pstorerhfnew_abs, 0xaf402084},
+    {S4_pstorerhtnew_abs, 0xaf402080},
+    {S4_pstorerhf_abs, 0xaf400084},
+    {S4_pstorerht_abs, 0xaf400080},
+    {S4_pstorerbfnew_abs, 0xaf002084},
+    {S4_pstorerbtnew_abs, 0xaf002080},
+    {S4_pstorerbf_abs, 0xaf000084},
+    {S4_pstorerbt_abs, 0xaf000080}};
+// HexagonII::INST_ICLASS_LD
+
+// HexagonII::INST_ICLASS_LD_ST_2
+static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
+                                                {L4_loadri_abs, 0x49800000},
+                                                {L4_loadruh_abs, 0x49600000},
+                                                {L4_loadrh_abs, 0x49400000},
+                                                {L4_loadrub_abs, 0x49200000},
+                                                {L4_loadrb_abs, 0x49000000},
+                                                {S2_storerdabs, 0x48c00000},
+                                                {S2_storerinewabs, 0x48a01000},
+                                                {S2_storerhnewabs, 0x48a00800},
+                                                {S2_storerbnewabs, 0x48a00000},
+                                                {S2_storeriabs, 0x48800000},
+                                                {S2_storerfabs, 0x48600000},
+                                                {S2_storerhabs, 0x48400000},
+                                                {S2_storerbabs, 0x48000000}};
+static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
+static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
+
+  unsigned MachineOpcode = 0;
+  unsigned LLVMOpcode = 0;
+
+  if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
+    for (size_t i = 0; i < NumCondS; ++i) {
+      if ((insn & StoreConditionalOpcodeData[i][1]) ==
+          StoreConditionalOpcodeData[i][1]) {
+        MachineOpcode = StoreConditionalOpcodeData[i][1];
+        LLVMOpcode = StoreConditionalOpcodeData[i][0];
+        break;
+      }
+    }
+  }
+  if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
+    for (size_t i = 0; i < NumLS; ++i) {
+      if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
+        MachineOpcode = LoadStoreOpcodeData[i][1];
+        LLVMOpcode = LoadStoreOpcodeData[i][0];
+        break;
+      }
+    }
+  }
+
+  if (MachineOpcode) {
+    unsigned Value = 0;
+    unsigned shift = 0;
+    MI.setOpcode(LLVMOpcode);
+    // Remove the parse bits from the insn.
+    insn &= ~HexagonII::INST_PARSE_MASK;
+
+    switch (LLVMOpcode) {
+    default:
+      return MCDisassembler::Fail;
+      break;
+
+    case Hexagon::S4_pstorerdf_abs:
+    case Hexagon::S4_pstorerdt_abs:
+    case Hexagon::S4_pstorerdfnew_abs:
+    case Hexagon::S4_pstorerdtnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Rtt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::S4_pstorerbnewf_abs:
+    case Hexagon::S4_pstorerbnewt_abs:
+    case Hexagon::S4_pstorerbnewfnew_abs:
+    case Hexagon::S4_pstorerbnewtnew_abs:
+    case Hexagon::S4_pstorerhnewf_abs:
+    case Hexagon::S4_pstorerhnewt_abs:
+    case Hexagon::S4_pstorerhnewfnew_abs:
+    case Hexagon::S4_pstorerhnewtnew_abs:
+    case Hexagon::S4_pstorerinewf_abs:
+    case Hexagon::S4_pstorerinewt_abs:
+    case Hexagon::S4_pstorerinewfnew_abs:
+    case Hexagon::S4_pstorerinewtnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Nt
+      Value = (insn >> 8) & UINT64_C(7);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::S4_pstorerbf_abs:
+    case Hexagon::S4_pstorerbt_abs:
+    case Hexagon::S4_pstorerbfnew_abs:
+    case Hexagon::S4_pstorerbtnew_abs:
+    case Hexagon::S4_pstorerhf_abs:
+    case Hexagon::S4_pstorerht_abs:
+    case Hexagon::S4_pstorerhfnew_abs:
+    case Hexagon::S4_pstorerhtnew_abs:
+    case Hexagon::S4_pstorerif_abs:
+    case Hexagon::S4_pstorerit_abs:
+    case Hexagon::S4_pstorerifnew_abs:
+    case Hexagon::S4_pstoreritnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Rt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::L4_ploadrdf_abs:
+    case Hexagon::L4_ploadrdt_abs:
+    case Hexagon::L4_ploadrdfnew_abs:
+    case Hexagon::L4_ploadrdtnew_abs: {
+      // op: Rdd
+      Value = insn & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      // op: Pt
+      Value = ((insn >> 9) & UINT64_C(3));
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = ((insn >> 15) & UINT64_C(62));
+      Value |= ((insn >> 8) & UINT64_C(1));
+      MI.addOperand(MCOperand::createImm(Value));
+      break;
+    }
+
+    case Hexagon::L4_ploadrbf_abs:
+    case Hexagon::L4_ploadrbt_abs:
+    case Hexagon::L4_ploadrbfnew_abs:
+    case Hexagon::L4_ploadrbtnew_abs:
+    case Hexagon::L4_ploadrhf_abs:
+    case Hexagon::L4_ploadrht_abs:
+    case Hexagon::L4_ploadrhfnew_abs:
+    case Hexagon::L4_ploadrhtnew_abs:
+    case Hexagon::L4_ploadrubf_abs:
+    case Hexagon::L4_ploadrubt_abs:
+    case Hexagon::L4_ploadrubfnew_abs:
+    case Hexagon::L4_ploadrubtnew_abs:
+    case Hexagon::L4_ploadruhf_abs:
+    case Hexagon::L4_ploadruht_abs:
+    case Hexagon::L4_ploadruhfnew_abs:
+    case Hexagon::L4_ploadruhtnew_abs:
+    case Hexagon::L4_ploadrif_abs:
+    case Hexagon::L4_ploadrit_abs:
+    case Hexagon::L4_ploadrifnew_abs:
+    case Hexagon::L4_ploadritnew_abs:
+      // op: Rd
+      Value = insn & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      // op: Pt
+      Value = (insn >> 9) & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 15) & UINT64_C(62);
+      Value |= (insn >> 8) & UINT64_C(1);
+      MI.addOperand(MCOperand::createImm(Value));
+      break;
+
+    // op: g16_2
+    case (Hexagon::L4_loadri_abs):
+      ++shift;
+    // op: g16_1
+    case Hexagon::L4_loadrh_abs:
+    case Hexagon::L4_loadruh_abs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::L4_loadrb_abs:
+    case Hexagon::L4_loadrub_abs: {
+      // op: Rd
+      Value |= insn & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(511);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      break;
+    }
+
+    case Hexagon::L4_loadrd_abs: {
+      Value = insn & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(511);
+      MI.addOperand(MCOperand::createImm(Value << 3));
+      break;
+    }
+
+    case Hexagon::S2_storerdabs: {
+      // op: g16_3
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << 3));
+      // op: Rtt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    // op: g16_2
+    case Hexagon::S2_storerinewabs:
+      ++shift;
+    // op: g16_1
+    case Hexagon::S2_storerhnewabs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::S2_storerbnewabs: {
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      // op: Nt
+      Value = (insn >> 8) & UINT64_C(7);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    // op: g16_2
+    case Hexagon::S2_storeriabs:
+      ++shift;
+    // op: g16_1
+    case Hexagon::S2_storerhabs:
+    case Hexagon::S2_storerfabs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::S2_storerbabs: {
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      // op: Rt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+    }
+    return MCDisassembler::Success;
+  }
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+                                 void const *Decoder) {
+
+  // Instruction Class for a constant a extender: bits 31:28 = 0x0000
+  if ((~insn & 0xf0000000) == 0xf0000000) {
+    unsigned Value;
+    // 27:16 High 12 bits of 26-bit extender.
+    Value = (insn & 0x0fff0000) << 4;
+    // 13:0 Low 14 bits of 26-bit extender.
+    Value |= ((insn & 0x3fff) << 6);
+    MI.setOpcode(Hexagon::A4_ext);
+    HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
+    return MCDisassembler::Success;
+  }
+  return MCDisassembler::Fail;
+}
+
+// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
+enum subInstBinaryValues {
+  V4_SA1_addi_BITS = 0x0000,
+  V4_SA1_addi_MASK = 0x1800,
+  V4_SA1_addrx_BITS = 0x1800,
+  V4_SA1_addrx_MASK = 0x1f00,
+  V4_SA1_addsp_BITS = 0x0c00,
+  V4_SA1_addsp_MASK = 0x1c00,
+  V4_SA1_and1_BITS = 0x1200,
+  V4_SA1_and1_MASK = 0x1f00,
+  V4_SA1_clrf_BITS = 0x1a70,
+  V4_SA1_clrf_MASK = 0x1e70,
+  V4_SA1_clrfnew_BITS = 0x1a50,
+  V4_SA1_clrfnew_MASK = 0x1e70,
+  V4_SA1_clrt_BITS = 0x1a60,
+  V4_SA1_clrt_MASK = 0x1e70,
+  V4_SA1_clrtnew_BITS = 0x1a40,
+  V4_SA1_clrtnew_MASK = 0x1e70,
+  V4_SA1_cmpeqi_BITS = 0x1900,
+  V4_SA1_cmpeqi_MASK = 0x1f00,
+  V4_SA1_combine0i_BITS = 0x1c00,
+  V4_SA1_combine0i_MASK = 0x1d18,
+  V4_SA1_combine1i_BITS = 0x1c08,
+  V4_SA1_combine1i_MASK = 0x1d18,
+  V4_SA1_combine2i_BITS = 0x1c10,
+  V4_SA1_combine2i_MASK = 0x1d18,
+  V4_SA1_combine3i_BITS = 0x1c18,
+  V4_SA1_combine3i_MASK = 0x1d18,
+  V4_SA1_combinerz_BITS = 0x1d08,
+  V4_SA1_combinerz_MASK = 0x1d08,
+  V4_SA1_combinezr_BITS = 0x1d00,
+  V4_SA1_combinezr_MASK = 0x1d08,
+  V4_SA1_dec_BITS = 0x1300,
+  V4_SA1_dec_MASK = 0x1f00,
+  V4_SA1_inc_BITS = 0x1100,
+  V4_SA1_inc_MASK = 0x1f00,
+  V4_SA1_seti_BITS = 0x0800,
+  V4_SA1_seti_MASK = 0x1c00,
+  V4_SA1_setin1_BITS = 0x1a00,
+  V4_SA1_setin1_MASK = 0x1e40,
+  V4_SA1_sxtb_BITS = 0x1500,
+  V4_SA1_sxtb_MASK = 0x1f00,
+  V4_SA1_sxth_BITS = 0x1400,
+  V4_SA1_sxth_MASK = 0x1f00,
+  V4_SA1_tfr_BITS = 0x1000,
+  V4_SA1_tfr_MASK = 0x1f00,
+  V4_SA1_zxtb_BITS = 0x1700,
+  V4_SA1_zxtb_MASK = 0x1f00,
+  V4_SA1_zxth_BITS = 0x1600,
+  V4_SA1_zxth_MASK = 0x1f00,
+  V4_SL1_loadri_io_BITS = 0x0000,
+  V4_SL1_loadri_io_MASK = 0x1000,
+  V4_SL1_loadrub_io_BITS = 0x1000,
+  V4_SL1_loadrub_io_MASK = 0x1000,
+  V4_SL2_deallocframe_BITS = 0x1f00,
+  V4_SL2_deallocframe_MASK = 0x1fc0,
+  V4_SL2_jumpr31_BITS = 0x1fc0,
+  V4_SL2_jumpr31_MASK = 0x1fc4,
+  V4_SL2_jumpr31_f_BITS = 0x1fc5,
+  V4_SL2_jumpr31_f_MASK = 0x1fc7,
+  V4_SL2_jumpr31_fnew_BITS = 0x1fc7,
+  V4_SL2_jumpr31_fnew_MASK = 0x1fc7,
+  V4_SL2_jumpr31_t_BITS = 0x1fc4,
+  V4_SL2_jumpr31_t_MASK = 0x1fc7,
+  V4_SL2_jumpr31_tnew_BITS = 0x1fc6,
+  V4_SL2_jumpr31_tnew_MASK = 0x1fc7,
+  V4_SL2_loadrb_io_BITS = 0x1000,
+  V4_SL2_loadrb_io_MASK = 0x1800,
+  V4_SL2_loadrd_sp_BITS = 0x1e00,
+  V4_SL2_loadrd_sp_MASK = 0x1f00,
+  V4_SL2_loadrh_io_BITS = 0x0000,
+  V4_SL2_loadrh_io_MASK = 0x1800,
+  V4_SL2_loadri_sp_BITS = 0x1c00,
+  V4_SL2_loadri_sp_MASK = 0x1e00,
+  V4_SL2_loadruh_io_BITS = 0x0800,
+  V4_SL2_loadruh_io_MASK = 0x1800,
+  V4_SL2_return_BITS = 0x1f40,
+  V4_SL2_return_MASK = 0x1fc4,
+  V4_SL2_return_f_BITS = 0x1f45,
+  V4_SL2_return_f_MASK = 0x1fc7,
+  V4_SL2_return_fnew_BITS = 0x1f47,
+  V4_SL2_return_fnew_MASK = 0x1fc7,
+  V4_SL2_return_t_BITS = 0x1f44,
+  V4_SL2_return_t_MASK = 0x1fc7,
+  V4_SL2_return_tnew_BITS = 0x1f46,
+  V4_SL2_return_tnew_MASK = 0x1fc7,
+  V4_SS1_storeb_io_BITS = 0x1000,
+  V4_SS1_storeb_io_MASK = 0x1000,
+  V4_SS1_storew_io_BITS = 0x0000,
+  V4_SS1_storew_io_MASK = 0x1000,
+  V4_SS2_allocframe_BITS = 0x1c00,
+  V4_SS2_allocframe_MASK = 0x1e00,
+  V4_SS2_storebi0_BITS = 0x1200,
+  V4_SS2_storebi0_MASK = 0x1f00,
+  V4_SS2_storebi1_BITS = 0x1300,
+  V4_SS2_storebi1_MASK = 0x1f00,
+  V4_SS2_stored_sp_BITS = 0x0a00,
+  V4_SS2_stored_sp_MASK = 0x1e00,
+  V4_SS2_storeh_io_BITS = 0x0000,
+  V4_SS2_storeh_io_MASK = 0x1800,
+  V4_SS2_storew_sp_BITS = 0x0800,
+  V4_SS2_storew_sp_MASK = 0x1e00,
+  V4_SS2_storewi0_BITS = 0x1000,
+  V4_SS2_storewi0_MASK = 0x1f00,
+  V4_SS2_storewi1_BITS = 0x1100,
+  V4_SS2_storewi1_MASK = 0x1f00
+};
+
+static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
+                                 raw_ostream &os) {
+  switch (IClass) {
+  case HexagonII::HSIG_L1:
+    if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS)
+      op = Hexagon::V4_SL1_loadri_io;
+    else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS)
+      op = Hexagon::V4_SL1_loadrub_io;
+    else {
+      os << "<unknown subinstruction>";
+      return MCDisassembler::Fail;
+    }
+    break;
+  case HexagonII::HSIG_L2:
+    if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS)
+      op = Hexagon::V4_SL2_deallocframe;
+    else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS)
+      op = Hexagon::V4_SL2_jumpr31;
+    else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS)
+      op = Hexagon::V4_SL2_jumpr31_f;
+    else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS)
+      op = Hexagon::V4_SL2_jumpr31_fnew;
+    else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS)
+      op = Hexagon::V4_SL2_jumpr31_t;
+    else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS)
+      op = Hexagon::V4_SL2_jumpr31_tnew;
+    else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS)
+      op = Hexagon::V4_SL2_loadrb_io;
+    else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS)
+      op = Hexagon::V4_SL2_loadrd_sp;
+    else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS)
+      op = Hexagon::V4_SL2_loadrh_io;
+    else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS)
+      op = Hexagon::V4_SL2_loadri_sp;
+    else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS)
+      op = Hexagon::V4_SL2_loadruh_io;
+    else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS)
+      op = Hexagon::V4_SL2_return;
+    else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS)
+      op = Hexagon::V4_SL2_return_f;
+    else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS)
+      op = Hexagon::V4_SL2_return_fnew;
+    else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS)
+      op = Hexagon::V4_SL2_return_t;
+    else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS)
+      op = Hexagon::V4_SL2_return_tnew;
+    else {
+      os << "<unknown subinstruction>";
+      return MCDisassembler::Fail;
+    }
+    break;
+  case HexagonII::HSIG_A:
+    if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS)
+      op = Hexagon::V4_SA1_addi;
+    else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS)
+      op = Hexagon::V4_SA1_addrx;
+    else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS)
+      op = Hexagon::V4_SA1_addsp;
+    else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS)
+      op = Hexagon::V4_SA1_and1;
+    else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS)
+      op = Hexagon::V4_SA1_clrf;
+    else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS)
+      op = Hexagon::V4_SA1_clrfnew;
+    else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS)
+      op = Hexagon::V4_SA1_clrt;
+    else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS)
+      op = Hexagon::V4_SA1_clrtnew;
+    else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS)
+      op = Hexagon::V4_SA1_cmpeqi;
+    else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS)
+      op = Hexagon::V4_SA1_combine0i;
+    else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS)
+      op = Hexagon::V4_SA1_combine1i;
+    else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS)
+      op = Hexagon::V4_SA1_combine2i;
+    else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS)
+      op = Hexagon::V4_SA1_combine3i;
+    else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS)
+      op = Hexagon::V4_SA1_combinerz;
+    else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS)
+      op = Hexagon::V4_SA1_combinezr;
+    else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS)
+      op = Hexagon::V4_SA1_dec;
+    else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS)
+      op = Hexagon::V4_SA1_inc;
+    else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS)
+      op = Hexagon::V4_SA1_seti;
+    else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS)
+      op = Hexagon::V4_SA1_setin1;
+    else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS)
+      op = Hexagon::V4_SA1_sxtb;
+    else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS)
+      op = Hexagon::V4_SA1_sxth;
+    else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS)
+      op = Hexagon::V4_SA1_tfr;
+    else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS)
+      op = Hexagon::V4_SA1_zxtb;
+    else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS)
+      op = Hexagon::V4_SA1_zxth;
+    else {
+      os << "<unknown subinstruction>";
+      return MCDisassembler::Fail;
+    }
+    break;
+  case HexagonII::HSIG_S1:
+    if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS)
+      op = Hexagon::V4_SS1_storeb_io;
+    else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS)
+      op = Hexagon::V4_SS1_storew_io;
+    else {
+      os << "<unknown subinstruction>";
+      return MCDisassembler::Fail;
+    }
+    break;
+  case HexagonII::HSIG_S2:
+    if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS)
+      op = Hexagon::V4_SS2_allocframe;
+    else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS)
+      op = Hexagon::V4_SS2_storebi0;
+    else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS)
+      op = Hexagon::V4_SS2_storebi1;
+    else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS)
+      op = Hexagon::V4_SS2_stored_sp;
+    else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS)
+      op = Hexagon::V4_SS2_storeh_io;
+    else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS)
+      op = Hexagon::V4_SS2_storew_sp;
+    else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS)
+      op = Hexagon::V4_SS2_storewi0;
+    else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS)
+      op = Hexagon::V4_SS2_storewi1;
+    else {
+      os << "<unknown subinstruction>";
+      return MCDisassembler::Fail;
+    }
+    break;
+  default:
+    os << "<unknown>";
+    return MCDisassembler::Fail;
+  }
+  return MCDisassembler::Success;
+}
+
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
+  if (encoded_reg < 8)
+    return Hexagon::R0 + encoded_reg;
+  else if (encoded_reg < 16)
+    return Hexagon::R0 + encoded_reg + 8;
+
+  // patently false value
+  return Hexagon::NoRegister;
+}
+
+static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
+  if (encoded_dreg < 4)
+    return Hexagon::D0 + encoded_dreg;
+  else if (encoded_dreg < 8)
+    return Hexagon::D0 + encoded_dreg + 4;
+
+  // patently false value
+  return Hexagon::NoRegister;
+}
+
+void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
+                                             unsigned inst) const {
+  int64_t operand;
+  MCOperand Op;
+  switch (opcode) {
+  case Hexagon::V4_SL2_deallocframe:
+  case Hexagon::V4_SL2_jumpr31:
+  case Hexagon::V4_SL2_jumpr31_f:
+  case Hexagon::V4_SL2_jumpr31_fnew:
+  case Hexagon::V4_SL2_jumpr31_t:
+  case Hexagon::V4_SL2_jumpr31_tnew:
+  case Hexagon::V4_SL2_return:
+  case Hexagon::V4_SL2_return_f:
+  case Hexagon::V4_SL2_return_fnew:
+  case Hexagon::V4_SL2_return_t:
+  case Hexagon::V4_SL2_return_tnew:
+    // no operands for these instructions
+    break;
+  case Hexagon::V4_SS2_allocframe:
+    // u 8-4{5_3}
+    operand = ((inst & 0x1f0) >> 4) << 3;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL1_loadri_io:
+    // Rd 3-0, Rs 7-4, u 11-8{4_2}
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0xf00) >> 6;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL1_loadrub_io:
+    // Rd 3-0, Rs 7-4, u 11-8
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0xf00) >> 8;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL2_loadrb_io:
+    // Rd 3-0, Rs 7-4, u 10-8
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0x700) >> 8;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL2_loadrh_io:
+  case Hexagon::V4_SL2_loadruh_io:
+    // Rd 3-0, Rs 7-4, u 10-8{3_1}
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0x700) >> 8) << 1;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL2_loadrd_sp:
+    // Rdd 2-0, u 7-3{5_3}
+    operand = getDRegFromSubinstEncoding(inst & 0x7);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0x0f8) >> 3) << 3;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SL2_loadri_sp:
+    // Rd 3-0, u 8-4{5_2}
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0x1f0) >> 4) << 2;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_addi:
+    // Rx 3-0 (x2), s7 10-4
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    MI->addOperand(Op);
+    operand = SignExtend64<7>((inst & 0x7f0) >> 4);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_addrx:
+    // Rx 3-0 (x2), Rs 7-4
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+  case Hexagon::V4_SA1_and1:
+  case Hexagon::V4_SA1_dec:
+  case Hexagon::V4_SA1_inc:
+  case Hexagon::V4_SA1_sxtb:
+  case Hexagon::V4_SA1_sxth:
+  case Hexagon::V4_SA1_tfr:
+  case Hexagon::V4_SA1_zxtb:
+  case Hexagon::V4_SA1_zxth:
+    // Rd 3-0, Rs 7-4
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SA1_addsp:
+    // Rd 3-0, u 9-4{6_2}
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0x3f0) >> 4) << 2;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_seti:
+    // Rd 3-0, u 9-4
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0x3f0) >> 4;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_clrf:
+  case Hexagon::V4_SA1_clrfnew:
+  case Hexagon::V4_SA1_clrt:
+  case Hexagon::V4_SA1_clrtnew:
+  case Hexagon::V4_SA1_setin1:
+    // Rd 3-0
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SA1_cmpeqi:
+    // Rs 7-4, u 1-0
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = inst & 0x3;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_combine0i:
+  case Hexagon::V4_SA1_combine1i:
+  case Hexagon::V4_SA1_combine2i:
+  case Hexagon::V4_SA1_combine3i:
+    // Rdd 2-0, u 6-5
+    operand = getDRegFromSubinstEncoding(inst & 0x7);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0x060) >> 5;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SA1_combinerz:
+  case Hexagon::V4_SA1_combinezr:
+    // Rdd 2-0, Rs 7-4
+    operand = getDRegFromSubinstEncoding(inst & 0x7);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SS1_storeb_io:
+    // Rs 7-4, u 11-8, Rt 3-0
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0xf00) >> 8;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SS1_storew_io:
+    // Rs 7-4, u 11-8{4_2}, Rt 3-0
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0xf00) >> 8) << 2;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SS2_storebi0:
+  case Hexagon::V4_SS2_storebi1:
+    // Rs 7-4, u 3-0
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = inst & 0xf;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SS2_storewi0:
+  case Hexagon::V4_SS2_storewi1:
+    // Rs 7-4, u 3-0{4_2}
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = (inst & 0xf) << 2;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    break;
+  case Hexagon::V4_SS2_stored_sp:
+    // s 8-3{6_3}, Rtt 2-0
+    operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    operand = getDRegFromSubinstEncoding(inst & 0x7);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SS2_storeh_io:
+    // Rs 7-4, u 10-8{3_1}, Rt 3-0
+    operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    operand = ((inst & 0x700) >> 8) << 1;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  case Hexagon::V4_SS2_storew_sp:
+    // u 8-4{5_2}, Rd 3-0
+    operand = ((inst & 0x1f0) >> 4) << 2;
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
+    operand = getRegFromSubinstEncoding(inst & 0xf);
+    Op = MCOperand::createReg(operand);
+    MI->addOperand(Op);
+    break;
+  default:
+    // don't crash with an invalid subinstruction
+    // llvm_unreachable("Invalid subinstruction in duplex instruction");
+    break;
+  }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 0000000..ed7d957
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,56 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
+// Minimum number of instructions in an end-loop packet.
+#define HEXAGON_PACKET_INNER_SIZE 2
+#define HEXAGON_PACKET_OUTER_SIZE 3
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
+#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
+
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class HexagonTargetMachine;
+
+  /// \brief Creates a Hexagon-specific Target Transformation Info pass.
+  ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 0000000..1189cfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,263 @@
+//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//===----------------------------------------------------------------------===//
+
+// Hexagon Architectures
+def ArchV4:  SubtargetFeature<"v4",  "HexagonArchVersion", "V4",  "Hexagon V4">;
+def ArchV5:  SubtargetFeature<"v5",  "HexagonArchVersion", "V5",  "Hexagon V5">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
+                                   "true", "Hexagon HVX instructions">;
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
+                                   "true", "Hexagon HVX Double instructions">;
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV5T             : Predicate<"HST->hasV5TOps()">;
+def NoV5T              : Predicate<"!HST->hasV5TOps()">;
+def HasV55T            : Predicate<"HST->hasV55TOps()">,
+                         AssemblerPredicate<"ArchV55">;
+def HasV60T            : Predicate<"HST->hasV60TOps()">,
+                         AssemblerPredicate<"ArchV60">;
+def UseMEMOP           : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T     : Predicate<"HST->modeIEEERndNear()">;
+def UseHVXDbl          : Predicate<"HST->useHVXDblOps()">,
+                         AssemblerPredicate<"ExtensionHVXDbl">;
+def UseHVXSgl          : Predicate<"HST->useHVXSglOps()">;
+
+def UseHVX             : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
+                         AssemblerPredicate<"ExtensionHVX">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+
+class ImmRegShl;
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+class IntrinsicsRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["PredSense"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = [""];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-true instructions with their
+// predicate-false forms
+//
+def getFalsePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-false instructions with their
+// predicate-true forms
+//
+def getTruePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+  let FilterClass = "PredNewRel";
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
+  let ColFields = ["PNewValue"];
+  let KeyCol = [""];
+  let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate .new predicated instructions with their old
+// format.
+//
+def getPredOldOpcode : InstrMapping {
+  let FilterClass = "PredNewRel";
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+  let ColFields = ["PNewValue"];
+  let KeyCol = ["new"];
+  let ValueCols = [[""]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+  let FilterClass = "NewValueRel";
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
+  let ColFields = ["NValueST"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate new-value store instructions with their old
+// format.
+//
+def getNonNVStore : InstrMapping {
+  let FilterClass = "NewValueRel";
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
+  let ColFields = ["NValueST"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+def getBaseWithImmOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+                   "isFloat"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["Absolute"];
+  let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["BaseImmOffset"];
+  let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+  let FilterClass = "ImmRegRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+  let ColFields = ["InputType"];
+  let KeyCol = ["imm"];
+  let ValueCols = [["reg"]];
+}
+
+def getRegShlForm : InstrMapping {
+  let FilterClass = "ImmRegShl";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+  let ColFields = ["InputType"];
+  let KeyCol = ["imm"];
+  let ValueCols = [["reg"]];
+}
+
+def notTakenBranchPrediction : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue",  "PredSense", "isBranch", "isPredicated"];
+  let ColFields = ["isBrTaken"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+def takenBranchPrediction : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue",  "PredSense", "isBranch", "isPredicated"];
+  let ColFields = ["isBrTaken"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+def getRealHWInstr : InstrMapping {
+  let FilterClass = "IntrinsicsRel";
+  let RowFields = ["BaseOpcode"];
+  let ColFields = ["InstrType"];
+  let KeyCol = ["Pseudo"];
+  let ValueCols = [["Pseudo"], ["Real"]];
+}
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+def HexagonInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, SchedMachineModel Model,
+           list<SubtargetFeature> Features>
+ : ProcessorModel<Name, Model, Features>;
+
+def : Proc<"hexagonv4",  HexagonModelV4,
+           [ArchV4]>;
+def : Proc<"hexagonv5",  HexagonModelV4,
+           [ArchV4, ArchV5]>;
+def : Proc<"hexagonv55", HexagonModelV55,
+           [ArchV4, ArchV5, ArchV55]>;
+def : Proc<"hexagonv60", HexagonModelV60,
+           [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def HexagonAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string TokenizingCharacters = "#()=:.<>!+*";
+}
+
+def Hexagon : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = HexagonInstrInfo;
+  let AssemblyParserVariants = [HexagonAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 0000000..e213089
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,598 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                        MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+#define DEBUG_TYPE "asm-printer"
+
+static cl::opt<bool> AlignCalls(
+         "hexagon-align-calls", cl::Hidden, cl::init(true),
+          cl::desc("Insert falign after call instruction for Hexagon target"));
+
+// Given a scalar register return its pair.
+inline static unsigned getHexagonRegisterPair(unsigned Reg,
+      const MCRegisterInfo *RI) {
+  assert(Hexagon::IntRegsRegClass.contains(Reg));
+  MCSuperRegIterator SR(Reg, RI, false);
+  unsigned Pair = *SR;
+  assert(Hexagon::DoubleRegsRegClass.contains(Pair));
+  return Pair;
+}
+
+HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
+                                     std::unique_ptr<MCStreamer> Streamer)
+    : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
+
+void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+
+  switch (MO.getType()) {
+  default: llvm_unreachable ("<unknown operand type>");
+  case MachineOperand::MO_Register:
+    O << HexagonInstPrinter::getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    MO.getMBB()->getSymbol()->print(O, MAI);
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    GetCPISymbol(MO.getIndex())->print(O, MAI);
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    // Computing the address of a global symbol, not calling it.
+    getSymbol(MO.getGlobal())->print(O, MAI);
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+}
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  if (MBB->hasAddressTaken())
+    return false;
+  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode,
+                                        raw_ostream &OS) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+    case 'c': // Don't print "$" before a global var name or constant.
+      // Hexagon never has a prefix.
+      printOperand(MI, OpNo, OS);
+      return false;
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        OS << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, OS);
+  return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  const MachineOperand &Base  = MI->getOperand(OpNo);
+  const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+  if (Base.isReg())
+    printOperand(MI, OpNo, O);
+  else
+    llvm_unreachable("Unimplemented");
+
+  if (Offset.isImm()) {
+    if (Offset.getImm())
+      O << " + #" << Offset.getImm();
+  }
+  else
+    llvm_unreachable("Unimplemented");
+
+  return false;
+}
+
+MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+		    MCStreamer &OutStreamer,
+                    const MCOperand &Imm, int AlignSize) {
+  MCSymbol *Sym;
+  int64_t Value;
+  if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
+    StringRef sectionPrefix;
+    std::string ImmString;
+    StringRef Name;
+    if (AlignSize == 8) {
+       Name = ".CONST_0000000000000000";
+       sectionPrefix = ".gnu.linkonce.l8";
+       ImmString = utohexstr(Value);
+    } else {
+       Name = ".CONST_00000000";
+       sectionPrefix = ".gnu.linkonce.l4";
+       ImmString = utohexstr(static_cast<uint32_t>(Value));
+    }
+
+    std::string symbolName =   // Yes, leading zeros are kept.
+      Name.drop_back(ImmString.size()).str() + ImmString;
+    std::string sectionName = sectionPrefix.str() + symbolName;
+
+    MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+        sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    OutStreamer.SwitchSection(Section);
+
+    Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
+    if (Sym->isUndefined()) {
+      OutStreamer.EmitLabel(Sym);
+      OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+      OutStreamer.EmitIntValue(Value, AlignSize);
+      OutStreamer.EmitCodeAlignment(AlignSize);
+    }
+  } else {
+    assert(Imm.isExpr() && "Expected expression and found none");
+    const MachineOperand &MO = MI.getOperand(1);
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+    MCSymbol *MOSymbol = nullptr;
+    if (MO.isGlobal())
+      MOSymbol = AP.getSymbol(MO.getGlobal());
+    else if (MO.isCPI())
+      MOSymbol = AP.GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = AP.GetJTISymbol(MO.getIndex());
+    else
+      llvm_unreachable("Unknown operand type!");
+
+    StringRef SymbolName = MOSymbol->getName();
+    std::string LitaName = ".CONST_" + SymbolName.str();
+
+    MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+        ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+    OutStreamer.SwitchSection(Section);
+    Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
+    if (Sym->isUndefined()) {
+      OutStreamer.EmitLabel(Sym);
+      OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
+      OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
+      OutStreamer.EmitCodeAlignment(AlignSize);
+    }
+  }
+  return Sym;
+}
+
+void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
+                                                  const MachineInstr &MI) {
+  MCInst &MappedInst = static_cast <MCInst &>(Inst);
+  const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+
+  switch (Inst.getOpcode()) {
+  default: return;
+
+  // "$dst = CONST64(#$src1)",
+  case Hexagon::CONST64_Float_Real:
+  case Hexagon::CONST64_Int_Real:
+    if (!OutStreamer->hasRawTextSupport()) {
+      const MCOperand &Imm = MappedInst.getOperand(1);
+      MCSectionSubPair Current = OutStreamer->getCurrentSection();
+
+      MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+
+      OutStreamer->SwitchSection(Current.first, Current.second);
+      MCInst TmpInst;
+      MCOperand &Reg = MappedInst.getOperand(0);
+      TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+      TmpInst.addOperand(Reg);
+      TmpInst.addOperand(MCOperand::createExpr(
+                         MCSymbolRefExpr::create(Sym, OutContext)));
+      MappedInst = TmpInst;
+
+    }
+    break;
+  case Hexagon::CONST32:
+  case Hexagon::CONST32_Float_Real:
+  case Hexagon::CONST32_Int_Real:
+  case Hexagon::FCONST32_nsdata:
+    if (!OutStreamer->hasRawTextSupport()) {
+      MCOperand &Imm = MappedInst.getOperand(1);
+      MCSectionSubPair Current = OutStreamer->getCurrentSection();
+      MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+      OutStreamer->SwitchSection(Current.first, Current.second);
+      MCInst TmpInst;
+      MCOperand &Reg = MappedInst.getOperand(0);
+      TmpInst.setOpcode(Hexagon::L2_loadrigp);
+      TmpInst.addOperand(Reg);
+      TmpInst.addOperand(MCOperand::createExpr(
+                         MCSymbolRefExpr::create(Sym, OutContext)));
+      MappedInst = TmpInst;
+    }
+    break;
+
+  // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use
+  // C2_or during instruction selection itself but it results
+  // into suboptimal code.
+  case Hexagon::C2_pxfer_map: {
+    MCOperand &Ps = Inst.getOperand(1);
+    MappedInst.setOpcode(Hexagon::C2_or);
+    MappedInst.addOperand(Ps);
+    return;
+  }
+
+  // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo
+  // The insn is mapped from the 4 operand to the 3 operand raw form taking
+  // 3 register pairs.
+  case Hexagon::M2_vrcmpys_acc_s1: {
+    MCOperand &Rt = Inst.getOperand(3);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+  case Hexagon::M2_vrcmpys_s1: {
+    MCOperand &Rt = Inst.getOperand(2);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+
+  case Hexagon::M2_vrcmpys_s1rp: {
+    MCOperand &Rt = Inst.getOperand(2);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+
+  case Hexagon::A4_boundscheck: {
+    MCOperand &Rs = Inst.getOperand(1);
+    assert (Rs.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rs.getReg());
+    if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+      MappedInst.setOpcode(Hexagon::A4_boundscheck_hi);
+    else         // raw:lo
+      MappedInst.setOpcode(Hexagon::A4_boundscheck_lo);
+    Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
+    return;
+  }
+  case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    int64_t Imm;
+    MCExpr const *Expr = MO.getExpr();
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::S2_vsathub);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      TmpInst.addOperand(MappedInst.getOperand(1));
+      MappedInst = TmpInst;
+      return;
+    }
+    TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  case Hexagon::S5_vasrhrnd_goodsyntax:
+  case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+    MCOperand &MO2 = MappedInst.getOperand(2);
+    MCExpr const *Expr = MO2.getExpr();
+    int64_t Imm;
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      MCOperand &MO1 = MappedInst.getOperand(1);
+      unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
+      unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+      // Add a new operand for the second register in the pair.
+      TmpInst.addOperand(MCOperand::createReg(High));
+      TmpInst.addOperand(MCOperand::createReg(Low));
+      MappedInst = TmpInst;
+      return;
+    }
+
+    if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax)
+      TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+    else
+      TmpInst.setOpcode(Hexagon::S5_vasrhrnd);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd
+  case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+    MCOperand &MO = Inst.getOperand(2);
+    MCExpr const *Expr = MO.getExpr();
+    int64_t Imm;
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::A2_tfr);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      TmpInst.addOperand(MappedInst.getOperand(1));
+      MappedInst = TmpInst;
+      return;
+    }
+    TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  case Hexagon::TFRI_f:
+    MappedInst.setOpcode(Hexagon::A2_tfrsi);
+    return;
+  case Hexagon::TFRI_cPt_f:
+    MappedInst.setOpcode(Hexagon::C2_cmoveit);
+    return;
+  case Hexagon::TFRI_cNotPt_f:
+    MappedInst.setOpcode(Hexagon::C2_cmoveif);
+    return;
+  case Hexagon::MUX_ri_f:
+    MappedInst.setOpcode(Hexagon::C2_muxri);
+    return;
+  case Hexagon::MUX_ir_f:
+    MappedInst.setOpcode(Hexagon::C2_muxir);
+    return;
+
+  // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
+  case Hexagon::A2_tfrpi: {
+    MCInst TmpInst;
+    MCOperand &Rdd = MappedInst.getOperand(0);
+    MCOperand &MO = MappedInst.getOperand(1);
+
+    TmpInst.setOpcode(Hexagon::A2_combineii);
+    TmpInst.addOperand(Rdd);
+    int64_t Imm;
+    bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
+    if (Success && Imm < 0) {
+      const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
+      TmpInst.addOperand(MCOperand::createExpr(MOne));
+    } else {
+      const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
+      TmpInst.addOperand(MCOperand::createExpr(Zero));
+    }
+    TmpInst.addOperand(MO);
+    MappedInst = TmpInst;
+    return;
+  }
+  // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+  case Hexagon::A2_tfrp: {
+    MCOperand &MO = MappedInst.getOperand(1);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode(Hexagon::A2_combinew);
+    return;
+  }
+
+  case Hexagon::A2_tfrpt:
+  case Hexagon::A2_tfrpf: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+                          ? Hexagon::C2_ccombinewt
+                          : Hexagon::C2_ccombinewf);
+    return;
+  }
+  case Hexagon::A2_tfrptnew:
+  case Hexagon::A2_tfrpfnew: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+                          ? Hexagon::C2_ccombinewnewt
+                          : Hexagon::C2_ccombinewnewf);
+    return;
+  }
+
+  case Hexagon::M2_mpysmi: {
+    MCOperand &Imm = MappedInst.getOperand(2);
+    MCExpr const *Expr = Imm.getExpr();
+    int64_t Value;
+    bool Success = Expr->evaluateAsAbsolute(Value);
+    assert(Success);(void)Success;
+    if (Value < 0 && Value > -256) {
+      MappedInst.setOpcode(Hexagon::M2_mpysin);
+      Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext));
+    }
+    else
+      MappedInst.setOpcode(Hexagon::M2_mpysip);
+    return;
+  }
+
+  case Hexagon::A2_addsp: {
+    MCOperand &Rt = Inst.getOperand(1);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::A2_addsph);
+    else
+      MappedInst.setOpcode(Hexagon::A2_addspl);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+  case Hexagon::HEXAGON_V6_vd0_pseudo:
+  case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+    MCInst TmpInst;
+    assert (Inst.getOperand(0).isReg() &&
+            "Expected register and none was found");
+
+    TmpInst.setOpcode(Hexagon::V6_vxor);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    MappedInst = TmpInst;
+    return;
+  }
+
+  }
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MCInst MCB = HexagonMCInstrInfo::createBundle();
+  const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
+
+  if (MI->isBundle()) {
+    const MachineBasicBlock* MBB = MI->getParent();
+    MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
+    unsigned IgnoreCount = 0;
+
+    for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
+      if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
+          MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+        ++IgnoreCount;
+      else
+        HexagonLowerToMC(MCII, &*MII, MCB, *this);
+  }
+  else
+    HexagonLowerToMC(MCII, MI, MCB, *this);
+
+  bool Ok = HexagonMCInstrInfo::canonicalizePacket(
+      MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
+  assert(Ok);
+  (void)Ok;
+  if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+    return;
+  OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+  RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
new file mode 100644
index 0000000..a78d97e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -0,0 +1,62 @@
+//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hexagon Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class HexagonAsmPrinter : public AsmPrinter {
+    const HexagonSubtarget *Subtarget;
+
+  public:
+    explicit HexagonAsmPrinter(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> Streamer);
+
+    bool runOnMachineFunction(MachineFunction &Fn) override {
+      Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
+      return AsmPrinter::runOnMachineFunction(Fn);
+    }
+
+    const char *getPassName() const override {
+      return "Hexagon Assembly Printer";
+    }
+
+    bool isBlockOnlyReachableByFallthrough(
+                                   const MachineBasicBlock *MBB) const override;
+
+    void EmitInstruction(const MachineInstr *MI) override;
+
+    void HexagonProcessInstruction(MCInst &Inst,
+                                   const MachineInstr &MBB);
+
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &OS) override;
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &OS) override;
+
+    static const char *getRegisterName(unsigned RegNo);
+  };
+
+} // end of llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
new file mode 100644
index 0000000..77907b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -0,0 +1,2778 @@
+//===--- HexagonBitSimplify.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexbit"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
+  FunctionPass *createHexagonBitSimplify();
+}
+
+namespace {
+  // Set of virtual registers, based on BitVector.
+  struct RegisterSet : private BitVector {
+    RegisterSet() : BitVector() {}
+    explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+    RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+    using BitVector::clear;
+    using BitVector::count;
+
+    unsigned find_first() const {
+      int First = BitVector::find_first();
+      if (First < 0)
+        return 0;
+      return x2v(First);
+    }
+
+    unsigned find_next(unsigned Prev) const {
+      int Next = BitVector::find_next(v2x(Prev));
+      if (Next < 0)
+        return 0;
+      return x2v(Next);
+    }
+
+    RegisterSet &insert(unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return static_cast<RegisterSet&>(BitVector::set(Idx));
+    }
+    RegisterSet &remove(unsigned R) {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return *this;
+      return static_cast<RegisterSet&>(BitVector::reset(Idx));
+    }
+
+    RegisterSet &insert(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+    }
+    RegisterSet &remove(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::reset(Rs));
+    }
+
+    reference operator[](unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return BitVector::operator[](Idx);
+    }
+    bool operator[](unsigned R) const {
+      unsigned Idx = v2x(R);
+      assert(Idx < size());
+      return BitVector::operator[](Idx);
+    }
+    bool has(unsigned R) const {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return false;
+      return BitVector::test(Idx);
+    }
+
+    bool empty() const {
+      return !BitVector::any();
+    }
+    bool includes(const RegisterSet &Rs) const {
+      // A.BitVector::test(B)  <=>  A-B != {}
+      return !Rs.BitVector::test(*this);
+    }
+    bool intersects(const RegisterSet &Rs) const {
+      return BitVector::anyCommon(Rs);
+    }
+
+  private:
+    void ensure(unsigned Idx) {
+      if (size() <= Idx)
+        resize(std::max(Idx+1, 32U));
+    }
+    static inline unsigned v2x(unsigned v) {
+      return TargetRegisterInfo::virtReg2Index(v);
+    }
+    static inline unsigned x2v(unsigned x) {
+      return TargetRegisterInfo::index2VirtReg(x);
+    }
+  };
+
+
+  struct PrintRegSet {
+    PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+      : RS(S), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS,
+          const PrintRegSet &P);
+  private:
+    const RegisterSet &RS;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
+    LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+    OS << '{';
+    for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+      OS << ' ' << PrintReg(R, P.TRI);
+    OS << " }";
+    return OS;
+  }
+}
+
+
+namespace {
+  class Transformation;
+
+  class HexagonBitSimplify : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+      initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon bit simplification";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
+    static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
+    static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
+        const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
+    static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W);
+    static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W);
+    static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W, uint64_t &U);
+    static bool replaceReg(unsigned OldR, unsigned NewR,
+        MachineRegisterInfo &MRI);
+    static bool getSubregMask(const BitTracker::RegisterRef &RR,
+        unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
+    static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
+        unsigned NewSR, MachineRegisterInfo &MRI);
+    static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
+        unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+    static bool parseRegSequence(const MachineInstr &I,
+        BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+
+    static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+        uint16_t Begin);
+    static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
+        uint16_t Begin, const HexagonInstrInfo &HII);
+
+    static const TargetRegisterClass *getFinalVRegClass(
+        const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
+    static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
+        const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
+
+  private:
+    MachineDominatorTree *MDT;
+
+    bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+  };
+
+  char HexagonBitSimplify::ID = 0;
+  typedef HexagonBitSimplify HBS;
+
+
+  // The purpose of this class is to provide a common facility to traverse
+  // the function top-down or bottom-up via the dominator tree, and keep
+  // track of the available registers.
+  class Transformation {
+  public:
+    bool TopDown;
+    Transformation(bool TD) : TopDown(TD) {}
+    virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
+    virtual ~Transformation() {}
+  };
+}
+
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+      "Hexagon bit simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+      "Hexagon bit simplification", false, false)
+
+
+bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
+      RegisterSet &AVs) {
+  MachineDomTreeNode *N = MDT->getNode(&B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  bool Changed = false;
+
+  if (T.TopDown)
+    Changed = T.processBlock(B, AVs);
+
+  RegisterSet Defs;
+  for (auto &I : B)
+    getInstrDefs(I, Defs);
+  RegisterSet NewAVs = AVs;
+  NewAVs.insert(Defs);
+
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    Changed |= visitBlock(*SB, T, NewAVs);
+  }
+  if (!T.TopDown)
+    Changed |= T.processBlock(B, AVs);
+
+  return Changed;
+}
+
+//
+// Utility functions:
+//
+void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
+      RegisterSet &Defs) {
+  for (auto &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Defs.insert(R);
+  }
+}
+
+void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
+      RegisterSet &Uses) {
+  for (auto &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.insert(R);
+  }
+}
+
+// Check if all the bits in range [B, E) in both cells are equal.
+bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
+      uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
+      uint16_t W) {
+  for (uint16_t i = 0; i < W; ++i) {
+    // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
+    if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
+      return false;
+    // Same for RC2[i].
+    if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
+      return false;
+    if (RC1[B1+i] != RC2[B2+i])
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC,
+      uint16_t B, uint16_t W) {
+  assert(B < RC.width() && B+W <= RC.width());
+  for (uint16_t i = B; i < B+W; ++i)
+    if (!RC[i].num())
+      return false;
+  return true;
+}
+
+
+bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
+      uint16_t B, uint16_t W) {
+  assert(B < RC.width() && B+W <= RC.width());
+  for (uint16_t i = B; i < B+W; ++i)
+    if (!RC[i].is(0))
+      return false;
+  return true;
+}
+
+
+bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
+        uint16_t B, uint16_t W, uint64_t &U) {
+  assert(B < RC.width() && B+W <= RC.width());
+  int64_t T = 0;
+  for (uint16_t i = B+W; i > B; --i) {
+    const BitTracker::BitValue &BV = RC[i-1];
+    T <<= 1;
+    if (BV.is(1))
+      T |= 1;
+    else if (!BV.is(0))
+      return false;
+  }
+  U = T;
+  return true;
+}
+
+
+bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
+      MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    I->setReg(NewR);
+  }
+  return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
+      unsigned NewSR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    I->setReg(NewR);
+    I->setSubReg(NewSR);
+  }
+  return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
+      unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    if (I->getSubReg() != OldSR)
+      continue;
+    I->setReg(NewR);
+    I->setSubReg(NewSR);
+  }
+  return Begin != End;
+}
+
+
+// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
+// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
+// if this succeeded, otherwise return false.
+bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
+      unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
+  const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    assert(RR.Sub == 0);
+    Begin = 0;
+    Width = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    if (RR.Sub == 0) {
+      Begin = 0;
+      Width = 64;
+      return true;
+    }
+    assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
+    Width = 32;
+    Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
+    return true;
+  }
+  return false;
+}
+
+
+// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
+// subregister.
+bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
+      BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+  assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
+  unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
+  assert(Sub1 != Sub2);
+  if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+    SL = I.getOperand(1);
+    SH = I.getOperand(3);
+    return true;
+  }
+  if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+    SH = I.getOperand(1);
+    SL = I.getOperand(3);
+    return true;
+  }
+  return false;
+}
+
+
+// All stores (except 64-bit stores) take a 32-bit register as the source
+// of the value to be stored. If the instruction stores into a location
+// that is shorter than 32 bits, some bits of the source register are not
+// used. For each store instruction, calculate the set of used bits in
+// the source register, and set appropriate bits in Bits. Return true if
+// the bits are calculated, false otherwise.
+bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+      uint16_t Begin) {
+  using namespace Hexagon;
+
+  switch (Opc) {
+    // Store byte
+    case S2_storerb_io:           // memb(Rs32+#s11:0)=Rt32
+    case S2_storerbnew_io:        // memb(Rs32+#s11:0)=Nt8.new
+    case S2_pstorerbt_io:         // if (Pv4) memb(Rs32+#u6:0)=Rt32
+    case S2_pstorerbf_io:         // if (!Pv4) memb(Rs32+#u6:0)=Rt32
+    case S4_pstorerbtnew_io:      // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
+    case S4_pstorerbfnew_io:      // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
+    case S2_pstorerbnewt_io:      // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
+    case S2_pstorerbnewf_io:      // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
+    case S4_pstorerbnewtnew_io:   // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+    case S4_pstorerbnewfnew_io:   // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+    case S2_storerb_pi:           // memb(Rx32++#s4:0)=Rt32
+    case S2_storerbnew_pi:        // memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbt_pi:         // if (Pv4) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbf_pi:         // if (!Pv4) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbtnew_pi:      // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbfnew_pi:      // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbnewt_pi:      // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewf_pi:      // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewtnew_pi:   // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewfnew_pi:   // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+    case S4_storerb_ap:           // memb(Re32=#U6)=Rt32
+    case S4_storerbnew_ap:        // memb(Re32=#U6)=Nt8.new
+    case S2_storerb_pr:           // memb(Rx32++Mu2)=Rt32
+    case S2_storerbnew_pr:        // memb(Rx32++Mu2)=Nt8.new
+    case S4_storerb_ur:           // memb(Ru32<<#u2+#U6)=Rt32
+    case S4_storerbnew_ur:        // memb(Ru32<<#u2+#U6)=Nt8.new
+    case S2_storerb_pbr:          // memb(Rx32++Mu2:brev)=Rt32
+    case S2_storerbnew_pbr:       // memb(Rx32++Mu2:brev)=Nt8.new
+    case S2_storerb_pci:          // memb(Rx32++#s4:0:circ(Mu2))=Rt32
+    case S2_storerbnew_pci:       // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
+    case S2_storerb_pcr:          // memb(Rx32++I:circ(Mu2))=Rt32
+    case S2_storerbnew_pcr:       // memb(Rx32++I:circ(Mu2))=Nt8.new
+    case S4_storerb_rr:           // memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_storerbnew_rr:        // memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbt_rr:         // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbf_rr:         // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbtnew_rr:      // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbfnew_rr:      // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbnewt_rr:      // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewf_rr:      // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewtnew_rr:   // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewfnew_rr:   // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S2_storerbgp:            // memb(gp+#u16:0)=Rt32
+    case S2_storerbnewgp:         // memb(gp+#u16:0)=Nt8.new
+    case S4_pstorerbt_abs:        // if (Pv4) memb(#u6)=Rt32
+    case S4_pstorerbf_abs:        // if (!Pv4) memb(#u6)=Rt32
+    case S4_pstorerbtnew_abs:     // if (Pv4.new) memb(#u6)=Rt32
+    case S4_pstorerbfnew_abs:     // if (!Pv4.new) memb(#u6)=Rt32
+    case S4_pstorerbnewt_abs:     // if (Pv4) memb(#u6)=Nt8.new
+    case S4_pstorerbnewf_abs:     // if (!Pv4) memb(#u6)=Nt8.new
+    case S4_pstorerbnewtnew_abs:  // if (Pv4.new) memb(#u6)=Nt8.new
+    case S4_pstorerbnewfnew_abs:  // if (!Pv4.new) memb(#u6)=Nt8.new
+      Bits.set(Begin, Begin+8);
+      return true;
+
+    // Store low half
+    case S2_storerh_io:           // memh(Rs32+#s11:1)=Rt32
+    case S2_storerhnew_io:        // memh(Rs32+#s11:1)=Nt8.new
+    case S2_pstorerht_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt32
+    case S2_pstorerhf_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt32
+    case S4_pstorerhtnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
+    case S4_pstorerhfnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
+    case S2_pstorerhnewt_io:      // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
+    case S2_pstorerhnewf_io:      // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
+    case S4_pstorerhnewtnew_io:   // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+    case S4_pstorerhnewfnew_io:   // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+    case S2_storerh_pi:           // memh(Rx32++#s4:1)=Rt32
+    case S2_storerhnew_pi:        // memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerht_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhf_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhtnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhfnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhnewt_pi:      // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewf_pi:      // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewtnew_pi:   // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewfnew_pi:   // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+    case S4_storerh_ap:           // memh(Re32=#U6)=Rt32
+    case S4_storerhnew_ap:        // memh(Re32=#U6)=Nt8.new
+    case S2_storerh_pr:           // memh(Rx32++Mu2)=Rt32
+    case S2_storerhnew_pr:        // memh(Rx32++Mu2)=Nt8.new
+    case S4_storerh_ur:           // memh(Ru32<<#u2+#U6)=Rt32
+    case S4_storerhnew_ur:        // memh(Ru32<<#u2+#U6)=Nt8.new
+    case S2_storerh_pbr:          // memh(Rx32++Mu2:brev)=Rt32
+    case S2_storerhnew_pbr:       // memh(Rx32++Mu2:brev)=Nt8.new
+    case S2_storerh_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt32
+    case S2_storerhnew_pci:       // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
+    case S2_storerh_pcr:          // memh(Rx32++I:circ(Mu2))=Rt32
+    case S2_storerhnew_pcr:       // memh(Rx32++I:circ(Mu2))=Nt8.new
+    case S4_storerh_rr:           // memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerht_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhf_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhtnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhfnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_storerhnew_rr:        // memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewt_rr:      // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewf_rr:      // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewtnew_rr:   // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewfnew_rr:   // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S2_storerhgp:            // memh(gp+#u16:1)=Rt32
+    case S2_storerhnewgp:         // memh(gp+#u16:1)=Nt8.new
+    case S4_pstorerht_abs:        // if (Pv4) memh(#u6)=Rt32
+    case S4_pstorerhf_abs:        // if (!Pv4) memh(#u6)=Rt32
+    case S4_pstorerhtnew_abs:     // if (Pv4.new) memh(#u6)=Rt32
+    case S4_pstorerhfnew_abs:     // if (!Pv4.new) memh(#u6)=Rt32
+    case S4_pstorerhnewt_abs:     // if (Pv4) memh(#u6)=Nt8.new
+    case S4_pstorerhnewf_abs:     // if (!Pv4) memh(#u6)=Nt8.new
+    case S4_pstorerhnewtnew_abs:  // if (Pv4.new) memh(#u6)=Nt8.new
+    case S4_pstorerhnewfnew_abs:  // if (!Pv4.new) memh(#u6)=Nt8.new
+      Bits.set(Begin, Begin+16);
+      return true;
+
+    // Store high half
+    case S2_storerf_io:           // memh(Rs32+#s11:1)=Rt.H32
+    case S2_pstorerft_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
+    case S2_pstorerff_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
+    case S4_pstorerftnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+    case S4_pstorerffnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+    case S2_storerf_pi:           // memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerft_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerff_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerftnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerffnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+    case S4_storerf_ap:           // memh(Re32=#U6)=Rt.H32
+    case S2_storerf_pr:           // memh(Rx32++Mu2)=Rt.H32
+    case S4_storerf_ur:           // memh(Ru32<<#u2+#U6)=Rt.H32
+    case S2_storerf_pbr:          // memh(Rx32++Mu2:brev)=Rt.H32
+    case S2_storerf_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
+    case S2_storerf_pcr:          // memh(Rx32++I:circ(Mu2))=Rt.H32
+    case S4_storerf_rr:           // memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerft_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerff_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerftnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerffnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S2_storerfgp:            // memh(gp+#u16:1)=Rt.H32
+    case S4_pstorerft_abs:        // if (Pv4) memh(#u6)=Rt.H32
+    case S4_pstorerff_abs:        // if (!Pv4) memh(#u6)=Rt.H32
+    case S4_pstorerftnew_abs:     // if (Pv4.new) memh(#u6)=Rt.H32
+    case S4_pstorerffnew_abs:     // if (!Pv4.new) memh(#u6)=Rt.H32
+      Bits.set(Begin+16, Begin+32);
+      return true;
+  }
+
+  return false;
+}
+
+
+// For an instruction with opcode Opc, calculate the set of bits that it
+// uses in a register in operand OpN. This only calculates the set of used
+// bits for cases where it does not depend on any operands (as is the case
+// in shifts, for example). For concrete instructions from a program, the
+// operand may be a subregister of a larger register, while Bits would
+// correspond to the larger register in its entirety. Because of that,
+// the parameter Begin can be used to indicate which bit of Bits should be
+// considered the LSB of of the operand.
+bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
+      BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
+  using namespace Hexagon;
+
+  const MCInstrDesc &D = HII.get(Opc);
+  if (D.mayStore()) {
+    if (OpN == D.getNumOperands()-1)
+      return getUsedBitsInStore(Opc, Bits, Begin);
+    return false;
+  }
+
+  switch (Opc) {
+    // One register source. Used bits: R1[0-7].
+    case A2_sxtb:
+    case A2_zxtb:
+    case A4_cmpbeqi:
+    case A4_cmpbgti:
+    case A4_cmpbgtui:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+8);
+        return true;
+      }
+      break;
+
+    // One register source. Used bits: R1[0-15].
+    case A2_aslh:
+    case A2_sxth:
+    case A2_zxth:
+    case A4_cmpheqi:
+    case A4_cmphgti:
+    case A4_cmphgtui:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // One register source. Used bits: R1[16-31].
+    case A2_asrh:
+      if (OpN == 1) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-7], R2[0-7].
+    case A4_cmpbeq:
+    case A4_cmpbgt:
+    case A4_cmpbgtu:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+8);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-15], R2[0-15].
+    case A4_cmpheq:
+    case A4_cmphgt:
+    case A4_cmphgtu:
+    case A2_addh_h16_ll:
+    case A2_addh_h16_sat_ll:
+    case A2_addh_l16_ll:
+    case A2_addh_l16_sat_ll:
+    case A2_combine_ll:
+    case A2_subh_h16_ll:
+    case A2_subh_h16_sat_ll:
+    case A2_subh_l16_ll:
+    case A2_subh_l16_sat_ll:
+    case M2_mpy_acc_ll_s0:
+    case M2_mpy_acc_ll_s1:
+    case M2_mpy_acc_sat_ll_s0:
+    case M2_mpy_acc_sat_ll_s1:
+    case M2_mpy_ll_s0:
+    case M2_mpy_ll_s1:
+    case M2_mpy_nac_ll_s0:
+    case M2_mpy_nac_ll_s1:
+    case M2_mpy_nac_sat_ll_s0:
+    case M2_mpy_nac_sat_ll_s1:
+    case M2_mpy_rnd_ll_s0:
+    case M2_mpy_rnd_ll_s1:
+    case M2_mpy_sat_ll_s0:
+    case M2_mpy_sat_ll_s1:
+    case M2_mpy_sat_rnd_ll_s0:
+    case M2_mpy_sat_rnd_ll_s1:
+    case M2_mpyd_acc_ll_s0:
+    case M2_mpyd_acc_ll_s1:
+    case M2_mpyd_ll_s0:
+    case M2_mpyd_ll_s1:
+    case M2_mpyd_nac_ll_s0:
+    case M2_mpyd_nac_ll_s1:
+    case M2_mpyd_rnd_ll_s0:
+    case M2_mpyd_rnd_ll_s1:
+    case M2_mpyu_acc_ll_s0:
+    case M2_mpyu_acc_ll_s1:
+    case M2_mpyu_ll_s0:
+    case M2_mpyu_ll_s1:
+    case M2_mpyu_nac_ll_s0:
+    case M2_mpyu_nac_ll_s1:
+    case M2_mpyud_acc_ll_s0:
+    case M2_mpyud_acc_ll_s1:
+    case M2_mpyud_ll_s0:
+    case M2_mpyud_ll_s1:
+    case M2_mpyud_nac_ll_s0:
+    case M2_mpyud_nac_ll_s1:
+      if (OpN == 1 || OpN == 2) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-15], R2[16-31].
+    case A2_addh_h16_lh:
+    case A2_addh_h16_sat_lh:
+    case A2_combine_lh:
+    case A2_subh_h16_lh:
+    case A2_subh_h16_sat_lh:
+    case M2_mpy_acc_lh_s0:
+    case M2_mpy_acc_lh_s1:
+    case M2_mpy_acc_sat_lh_s0:
+    case M2_mpy_acc_sat_lh_s1:
+    case M2_mpy_lh_s0:
+    case M2_mpy_lh_s1:
+    case M2_mpy_nac_lh_s0:
+    case M2_mpy_nac_lh_s1:
+    case M2_mpy_nac_sat_lh_s0:
+    case M2_mpy_nac_sat_lh_s1:
+    case M2_mpy_rnd_lh_s0:
+    case M2_mpy_rnd_lh_s1:
+    case M2_mpy_sat_lh_s0:
+    case M2_mpy_sat_lh_s1:
+    case M2_mpy_sat_rnd_lh_s0:
+    case M2_mpy_sat_rnd_lh_s1:
+    case M2_mpyd_acc_lh_s0:
+    case M2_mpyd_acc_lh_s1:
+    case M2_mpyd_lh_s0:
+    case M2_mpyd_lh_s1:
+    case M2_mpyd_nac_lh_s0:
+    case M2_mpyd_nac_lh_s1:
+    case M2_mpyd_rnd_lh_s0:
+    case M2_mpyd_rnd_lh_s1:
+    case M2_mpyu_acc_lh_s0:
+    case M2_mpyu_acc_lh_s1:
+    case M2_mpyu_lh_s0:
+    case M2_mpyu_lh_s1:
+    case M2_mpyu_nac_lh_s0:
+    case M2_mpyu_nac_lh_s1:
+    case M2_mpyud_acc_lh_s0:
+    case M2_mpyud_acc_lh_s1:
+    case M2_mpyud_lh_s0:
+    case M2_mpyud_lh_s1:
+    case M2_mpyud_nac_lh_s0:
+    case M2_mpyud_nac_lh_s1:
+    // These four are actually LH.
+    case A2_addh_l16_hl:
+    case A2_addh_l16_sat_hl:
+    case A2_subh_l16_hl:
+    case A2_subh_l16_sat_hl:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      if (OpN == 2) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+
+    // Two register sources, used bits: R1[16-31], R2[0-15].
+    case A2_addh_h16_hl:
+    case A2_addh_h16_sat_hl:
+    case A2_combine_hl:
+    case A2_subh_h16_hl:
+    case A2_subh_h16_sat_hl:
+    case M2_mpy_acc_hl_s0:
+    case M2_mpy_acc_hl_s1:
+    case M2_mpy_acc_sat_hl_s0:
+    case M2_mpy_acc_sat_hl_s1:
+    case M2_mpy_hl_s0:
+    case M2_mpy_hl_s1:
+    case M2_mpy_nac_hl_s0:
+    case M2_mpy_nac_hl_s1:
+    case M2_mpy_nac_sat_hl_s0:
+    case M2_mpy_nac_sat_hl_s1:
+    case M2_mpy_rnd_hl_s0:
+    case M2_mpy_rnd_hl_s1:
+    case M2_mpy_sat_hl_s0:
+    case M2_mpy_sat_hl_s1:
+    case M2_mpy_sat_rnd_hl_s0:
+    case M2_mpy_sat_rnd_hl_s1:
+    case M2_mpyd_acc_hl_s0:
+    case M2_mpyd_acc_hl_s1:
+    case M2_mpyd_hl_s0:
+    case M2_mpyd_hl_s1:
+    case M2_mpyd_nac_hl_s0:
+    case M2_mpyd_nac_hl_s1:
+    case M2_mpyd_rnd_hl_s0:
+    case M2_mpyd_rnd_hl_s1:
+    case M2_mpyu_acc_hl_s0:
+    case M2_mpyu_acc_hl_s1:
+    case M2_mpyu_hl_s0:
+    case M2_mpyu_hl_s1:
+    case M2_mpyu_nac_hl_s0:
+    case M2_mpyu_nac_hl_s1:
+    case M2_mpyud_acc_hl_s0:
+    case M2_mpyud_acc_hl_s1:
+    case M2_mpyud_hl_s0:
+    case M2_mpyud_hl_s1:
+    case M2_mpyud_nac_hl_s0:
+    case M2_mpyud_nac_hl_s1:
+      if (OpN == 1) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      if (OpN == 2) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // Two register sources, used bits: R1[16-31], R2[16-31].
+    case A2_addh_h16_hh:
+    case A2_addh_h16_sat_hh:
+    case A2_combine_hh:
+    case A2_subh_h16_hh:
+    case A2_subh_h16_sat_hh:
+    case M2_mpy_acc_hh_s0:
+    case M2_mpy_acc_hh_s1:
+    case M2_mpy_acc_sat_hh_s0:
+    case M2_mpy_acc_sat_hh_s1:
+    case M2_mpy_hh_s0:
+    case M2_mpy_hh_s1:
+    case M2_mpy_nac_hh_s0:
+    case M2_mpy_nac_hh_s1:
+    case M2_mpy_nac_sat_hh_s0:
+    case M2_mpy_nac_sat_hh_s1:
+    case M2_mpy_rnd_hh_s0:
+    case M2_mpy_rnd_hh_s1:
+    case M2_mpy_sat_hh_s0:
+    case M2_mpy_sat_hh_s1:
+    case M2_mpy_sat_rnd_hh_s0:
+    case M2_mpy_sat_rnd_hh_s1:
+    case M2_mpyd_acc_hh_s0:
+    case M2_mpyd_acc_hh_s1:
+    case M2_mpyd_hh_s0:
+    case M2_mpyd_hh_s1:
+    case M2_mpyd_nac_hh_s0:
+    case M2_mpyd_nac_hh_s1:
+    case M2_mpyd_rnd_hh_s0:
+    case M2_mpyd_rnd_hh_s1:
+    case M2_mpyu_acc_hh_s0:
+    case M2_mpyu_acc_hh_s1:
+    case M2_mpyu_hh_s0:
+    case M2_mpyu_hh_s1:
+    case M2_mpyu_nac_hh_s0:
+    case M2_mpyu_nac_hh_s1:
+    case M2_mpyud_acc_hh_s0:
+    case M2_mpyud_acc_hh_s1:
+    case M2_mpyud_hh_s0:
+    case M2_mpyud_hh_s1:
+    case M2_mpyud_nac_hh_s0:
+    case M2_mpyud_nac_hh_s1:
+      if (OpN == 1 || OpN == 2) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+  }
+
+  return false;
+}
+
+
+// Calculate the register class that matches Reg:Sub. For example, if
+// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// register class.
+const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
+      const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return nullptr;
+  auto *RC = MRI.getRegClass(RR.Reg);
+  if (RR.Sub == 0)
+    return RC;
+
+  auto VerifySR = [] (unsigned Sub) -> void {
+    assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+  };
+
+  switch (RC->getID()) {
+    case Hexagon::DoubleRegsRegClassID:
+      VerifySR(RR.Sub);
+      return &Hexagon::IntRegsRegClass;
+  }
+  return nullptr;
+}
+
+
+// Check if RD could be replaced with RS at any possible use of RD.
+// For example a predicate register cannot be replaced with a integer
+// register, but a 64-bit register with a subregister can be replaced
+// with a 32-bit register.
+bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
+      const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
+      !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+    return false;
+  // Return false if one (or both) classes are nullptr.
+  auto *DRC = getFinalVRegClass(RD, MRI);
+  if (!DRC)
+    return false;
+
+  return DRC == getFinalVRegClass(RS, MRI);
+}
+
+
+//
+// Dead code elimination
+//
+namespace {
+  class DeadCodeElimination {
+  public:
+    DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
+      : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+        MDT(mdt), MRI(mf.getRegInfo()) {}
+
+    bool run() {
+      return runOnNode(MDT.getRootNode());
+    }
+
+  private:
+    bool isDead(unsigned R) const;
+    bool runOnNode(MachineDomTreeNode *N);
+
+    MachineFunction &MF;
+    const HexagonInstrInfo &HII;
+    MachineDominatorTree &MDT;
+    MachineRegisterInfo &MRI;
+  };
+}
+
+
+bool DeadCodeElimination::isDead(unsigned R) const {
+  for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (UseI->isDebugValue())
+      continue;
+    if (UseI->isPHI()) {
+      assert(!UseI->getOperand(0).getSubReg());
+      unsigned DR = UseI->getOperand(0).getReg();
+      if (DR == R)
+        continue;
+    }
+    return false;
+  }
+  return true;
+}
+
+
+bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
+  bool Changed = false;
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+    Changed |= runOnNode(*I);
+
+  MachineBasicBlock *B = N->getBlock();
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  for (auto MI : Instrs) {
+    unsigned Opc = MI->getOpcode();
+    // Do not touch lifetime markers. This is why the target-independent DCE
+    // cannot be used.
+    if (Opc == TargetOpcode::LIFETIME_START ||
+        Opc == TargetOpcode::LIFETIME_END)
+      continue;
+    bool Store = false;
+    if (MI->isInlineAsm())
+      continue;
+    // Delete PHIs if possible.
+    if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
+      continue;
+
+    bool AllDead = true;
+    SmallVector<unsigned,2> Regs;
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      unsigned R = Op.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+        AllDead = false;
+        break;
+      }
+      Regs.push_back(R);
+    }
+    if (!AllDead)
+      continue;
+
+    B->erase(MI);
+    for (unsigned i = 0, n = Regs.size(); i != n; ++i)
+      MRI.markUsesInDebugValueAsUndef(Regs[i]);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+//
+// Eliminate redundant instructions
+//
+// This transformation will identify instructions where the output register
+// is the same as one of its input registers. This only works on instructions
+// that define a single register (unlike post-increment loads, for example).
+// The equality check is actually more detailed: the code calculates which
+// bits of the output are used, and only compares these bits with the input
+// registers.
+// If the output matches an input, the instruction is replaced with COPY.
+// The copies will be removed by another transformation.
+namespace {
+  class RedundantInstrElimination : public Transformation {
+  public:
+    RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
+          MachineRegisterInfo &mri)
+        : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
+          unsigned &LostB, unsigned &LostE);
+    bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
+          unsigned &LostB, unsigned &LostE);
+    bool computeUsedBits(unsigned Reg, BitVector &Bits);
+    bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
+          uint16_t Begin);
+    bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+
+// Check if the instruction is a lossy shift left, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
+      unsigned OpN, unsigned &LostB, unsigned &LostE) {
+  using namespace Hexagon;
+  unsigned Opc = MI.getOpcode();
+  unsigned ImN, RegN, Width;
+  switch (Opc) {
+    case S2_asl_i_p:
+      ImN = 2;
+      RegN = 1;
+      Width = 64;
+      break;
+    case S2_asl_i_p_acc:
+    case S2_asl_i_p_and:
+    case S2_asl_i_p_nac:
+    case S2_asl_i_p_or:
+    case S2_asl_i_p_xacc:
+      ImN = 3;
+      RegN = 2;
+      Width = 64;
+      break;
+    case S2_asl_i_r:
+      ImN = 2;
+      RegN = 1;
+      Width = 32;
+      break;
+    case S2_addasl_rrri:
+    case S4_andi_asl_ri:
+    case S4_ori_asl_ri:
+    case S4_addi_asl_ri:
+    case S4_subi_asl_ri:
+    case S2_asl_i_r_acc:
+    case S2_asl_i_r_and:
+    case S2_asl_i_r_nac:
+    case S2_asl_i_r_or:
+    case S2_asl_i_r_sat:
+    case S2_asl_i_r_xacc:
+      ImN = 3;
+      RegN = 2;
+      Width = 32;
+      break;
+    default:
+      return false;
+  }
+
+  if (RegN != OpN)
+    return false;
+
+  assert(MI.getOperand(ImN).isImm());
+  unsigned S = MI.getOperand(ImN).getImm();
+  if (S == 0)
+    return false;
+  LostB = Width-S;
+  LostE = Width;
+  return true;
+}
+
+
+// Check if the instruction is a lossy shift right, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
+      unsigned OpN, unsigned &LostB, unsigned &LostE) {
+  using namespace Hexagon;
+  unsigned Opc = MI.getOpcode();
+  unsigned ImN, RegN;
+  switch (Opc) {
+    case S2_asr_i_p:
+    case S2_lsr_i_p:
+      ImN = 2;
+      RegN = 1;
+      break;
+    case S2_asr_i_p_acc:
+    case S2_asr_i_p_and:
+    case S2_asr_i_p_nac:
+    case S2_asr_i_p_or:
+    case S2_lsr_i_p_acc:
+    case S2_lsr_i_p_and:
+    case S2_lsr_i_p_nac:
+    case S2_lsr_i_p_or:
+    case S2_lsr_i_p_xacc:
+      ImN = 3;
+      RegN = 2;
+      break;
+    case S2_asr_i_r:
+    case S2_lsr_i_r:
+      ImN = 2;
+      RegN = 1;
+      break;
+    case S4_andi_lsr_ri:
+    case S4_ori_lsr_ri:
+    case S4_addi_lsr_ri:
+    case S4_subi_lsr_ri:
+    case S2_asr_i_r_acc:
+    case S2_asr_i_r_and:
+    case S2_asr_i_r_nac:
+    case S2_asr_i_r_or:
+    case S2_lsr_i_r_acc:
+    case S2_lsr_i_r_and:
+    case S2_lsr_i_r_nac:
+    case S2_lsr_i_r_or:
+    case S2_lsr_i_r_xacc:
+      ImN = 3;
+      RegN = 2;
+      break;
+
+    default:
+      return false;
+  }
+
+  if (RegN != OpN)
+    return false;
+
+  assert(MI.getOperand(ImN).isImm());
+  unsigned S = MI.getOperand(ImN).getImm();
+  LostB = 0;
+  LostE = S;
+  return true;
+}
+
+
+// Calculate the bit vector that corresponds to the used bits of register Reg.
+// The vector Bits has the same size, as the size of Reg in bits. If the cal-
+// culation fails (i.e. the used bits are unknown), it returns false. Other-
+// wise, it returns true and sets the corresponding bits in Bits.
+bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
+  BitVector Used(Bits.size());
+  RegisterSet Visited;
+  std::vector<unsigned> Pending;
+  Pending.push_back(Reg);
+
+  for (unsigned i = 0; i < Pending.size(); ++i) {
+    unsigned R = Pending[i];
+    if (Visited.has(R))
+      continue;
+    Visited.insert(R);
+    for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+      BitTracker::RegisterRef UR = *I;
+      unsigned B, W;
+      if (!HBS::getSubregMask(UR, B, W, MRI))
+        return false;
+      MachineInstr &UseI = *I->getParent();
+      if (UseI.isPHI() || UseI.isCopy()) {
+        unsigned DefR = UseI.getOperand(0).getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(DefR))
+          return false;
+        Pending.push_back(DefR);
+      } else {
+        if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
+          return false;
+      }
+    }
+  }
+  Bits |= Used;
+  return true;
+}
+
+
+// Calculate the bits used by instruction MI in a register in operand OpN.
+// Return true/false if the calculation succeeds/fails. If is succeeds, set
+// used bits in Bits. This function does not reset any bits in Bits, so
+// subsequent calls over different instructions will result in the union
+// of the used bits in all these instructions.
+// The register in question may be used with a sub-register, whereas Bits
+// holds the bits for the entire register. To keep track of that, the
+// argument Begin indicates where in Bits is the lowest-significant bit
+// of the register used in operand OpN. For example, in instruction:
+//   vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+// the operand 1 is a 32-bit register, which happens to be a subregister
+// of the 64-bit register vreg2, and that subregister starts at position 32.
+// In this case Begin=32, since Bits[32] would be the lowest-significant bit
+// of vreg2:subreg_hireg.
+bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
+      unsigned OpN, BitVector &Bits, uint16_t Begin) {
+  unsigned Opc = MI.getOpcode();
+  BitVector T(Bits.size());
+  bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
+  // Even if we don't have bits yet, we could still provide some information
+  // if the instruction is a lossy shift: the lost bits will be marked as
+  // not used.
+  unsigned LB, LE;
+  if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
+    assert(MI.getOperand(OpN).isReg());
+    BitTracker::RegisterRef RR = MI.getOperand(OpN);
+    const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
+    uint16_t Width = RC->getSize()*8;
+
+    if (!GotBits)
+      T.set(Begin, Begin+Width);
+    assert(LB <= LE && LB < Width && LE <= Width);
+    T.reset(Begin+LB, Begin+LE);
+    GotBits = true;
+  }
+  if (GotBits)
+    Bits |= T;
+  return GotBits;
+}
+
+
+// Calculates the used bits in RD ("defined register"), and checks if these
+// bits in RS ("used register") and RD are identical.
+bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
+      BitTracker::RegisterRef RS) {
+  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+
+  unsigned DB, DW;
+  if (!HBS::getSubregMask(RD, DB, DW, MRI))
+    return false;
+  unsigned SB, SW;
+  if (!HBS::getSubregMask(RS, SB, SW, MRI))
+    return false;
+  if (SW != DW)
+    return false;
+
+  BitVector Used(DC.width());
+  if (!computeUsedBits(RD.Reg, Used))
+    return false;
+
+  for (unsigned i = 0; i != DW; ++i)
+    if (Used[i+DB] && DC[DB+i] != SC[SB+i])
+      return false;
+  return true;
+}
+
+
+bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
+      const RegisterSet&) {
+  bool Changed = false;
+
+  for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
+    NextI = std::next(I);
+    MachineInstr *MI = &*I;
+
+    if (MI->getOpcode() == TargetOpcode::COPY)
+      continue;
+    if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+      continue;
+    unsigned NumD = MI->getDesc().getNumDefs();
+    if (NumD != 1)
+      continue;
+
+    BitTracker::RegisterRef RD = MI->getOperand(0);
+    if (!BT.has(RD.Reg))
+      continue;
+    const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+
+    // Find a source operand that is equal to the result.
+    for (auto &Op : MI->uses()) {
+      if (!Op.isReg())
+        continue;
+      BitTracker::RegisterRef RS = Op;
+      if (!BT.has(RS.Reg))
+        continue;
+      if (!HBS::isTransparentCopy(RD, RS, MRI))
+        continue;
+
+      unsigned BN, BW;
+      if (!HBS::getSubregMask(RS, BN, BW, MRI))
+        continue;
+
+      const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+      if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
+        continue;
+
+      // If found, replace the instruction with a COPY.
+      DebugLoc DL = MI->getDebugLoc();
+      const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+      unsigned NewR = MRI.createVirtualRegister(FRC);
+      BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+          .addReg(RS.Reg, 0, RS.Sub);
+      HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+      BT.put(BitTracker::RegisterRef(NewR), SC);
+      Changed = true;
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+
+//
+// Const generation
+//
+// Recognize instructions that produce constant values known at compile-time.
+// Replace them with register definitions that load these constants directly.
+namespace {
+  class ConstGeneration : public Transformation {
+  public:
+    ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool isTfrConst(const MachineInstr *MI) const;
+    bool isConst(unsigned R, int64_t &V) const;
+    unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
+        MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
+  if (!BT.has(R))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(R);
+  int64_t T = 0;
+  for (unsigned i = RC.width(); i > 0; --i) {
+    const BitTracker::BitValue &V = RC[i-1];
+    T <<= 1;
+    if (V.is(1))
+      T |= 1;
+    else if (!V.is(0))
+      return false;
+  }
+  C = T;
+  return true;
+}
+
+
+bool ConstGeneration::isTfrConst(const MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+    case Hexagon::A2_tfrsi:
+    case Hexagon::A2_tfrpi:
+    case Hexagon::TFR_PdTrue:
+    case Hexagon::TFR_PdFalse:
+    case Hexagon::CONST32_Int_Real:
+    case Hexagon::CONST64_Int_Real:
+      return true;
+  }
+  return false;
+}
+
+
+// Generate a transfer-immediate instruction that is appropriate for the
+// register class and the actual value being transferred.
+unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+      MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+  unsigned Reg = MRI.createVirtualRegister(RC);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
+        .addImm(int32_t(C));
+    return Reg;
+  }
+
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    if (isInt<8>(C)) {
+      BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
+          .addImm(C);
+      return Reg;
+    }
+
+    unsigned Lo = Lo_32(C), Hi = Hi_32(C);
+    if (isInt<8>(Lo) || isInt<8>(Hi)) {
+      unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
+                                  : Hexagon::A4_combineii;
+      BuildMI(B, At, DL, HII.get(Opc), Reg)
+          .addImm(int32_t(Hi))
+          .addImm(int32_t(Lo));
+      return Reg;
+    }
+
+    BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+        .addImm(C);
+    return Reg;
+  }
+
+  if (RC == &Hexagon::PredRegsRegClass) {
+    unsigned Opc;
+    if (C == 0)
+      Opc = Hexagon::TFR_PdFalse;
+    else if ((C & 0xFF) == 0xFF)
+      Opc = Hexagon::TFR_PdTrue;
+    else
+      return 0;
+    BuildMI(B, At, DL, HII.get(Opc), Reg);
+    return Reg;
+  }
+
+  return 0;
+}
+
+
+bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+  bool Changed = false;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(); I != E; ++I) {
+    if (isTfrConst(I))
+      continue;
+    Defs.clear();
+    HBS::getInstrDefs(*I, Defs);
+    if (Defs.count() != 1)
+      continue;
+    unsigned DR = Defs.find_first();
+    if (!TargetRegisterInfo::isVirtualRegister(DR))
+      continue;
+    int64_t C;
+    if (isConst(DR, C)) {
+      DebugLoc DL = I->getDebugLoc();
+      auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+      unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+      if (ImmReg) {
+        HBS::replaceReg(DR, ImmReg, MRI);
+        BT.put(ImmReg, BT.lookup(DR));
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+
+//
+// Copy generation
+//
+// Identify pairs of available registers which hold identical values.
+// In such cases, only one of them needs to be calculated, the other one
+// will be defined as a copy of the first.
+//
+// Copy propagation
+//
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
+namespace {
+  class CopyGeneration : public Transformation {
+  public:
+    CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool findMatch(const BitTracker::RegisterRef &Inp,
+        BitTracker::RegisterRef &Out, const RegisterSet &AVs);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+
+  class CopyPropagation : public Transformation {
+  public:
+    CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+        : Transformation(false), MRI(mri) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+    static bool isCopyReg(unsigned Opc);
+  private:
+    bool propagateRegCopy(MachineInstr &MI);
+
+    MachineRegisterInfo &MRI;
+  };
+
+}
+
+
+/// Check if there is a register in AVs that is identical to Inp. If so,
+/// set Out to the found register. The output may be a pair Reg:Sub.
+bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
+      BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
+  if (!BT.has(Inp.Reg))
+    return false;
+  const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+  unsigned B, W;
+  if (!HBS::getSubregMask(Inp, B, W, MRI))
+    return false;
+
+  for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+    if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+      continue;
+    const BitTracker::RegisterCell &RC = BT.lookup(R);
+    unsigned RW = RC.width();
+    if (W == RW) {
+      if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+        continue;
+      if (!HBS::isEqual(InpRC, B, RC, 0, W))
+        continue;
+      Out.Reg = R;
+      Out.Sub = 0;
+      return true;
+    }
+    // Check if there is a super-register, whose part (with a subregister)
+    // is equal to the input.
+    // Only do double registers for now.
+    if (W*2 != RW)
+      continue;
+    if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
+      continue;
+
+    if (HBS::isEqual(InpRC, B, RC, 0, W))
+      Out.Sub = Hexagon::subreg_loreg;
+    else if (HBS::isEqual(InpRC, B, RC, W, W))
+      Out.Sub = Hexagon::subreg_hireg;
+    else
+      continue;
+    Out.Reg = R;
+    return true;
+  }
+  return false;
+}
+
+
+bool CopyGeneration::processBlock(MachineBasicBlock &B,
+      const RegisterSet &AVs) {
+  RegisterSet AVB(AVs);
+  bool Changed = false;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
+       ++I, AVB.insert(Defs)) {
+    NextI = std::next(I);
+    Defs.clear();
+    HBS::getInstrDefs(*I, Defs);
+
+    unsigned Opc = I->getOpcode();
+    if (CopyPropagation::isCopyReg(Opc))
+      continue;
+
+    for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+      BitTracker::RegisterRef MR;
+      if (!findMatch(R, MR, AVB))
+        continue;
+      DebugLoc DL = I->getDebugLoc();
+      auto *FRC = HBS::getFinalVRegClass(MR, MRI);
+      unsigned NewR = MRI.createVirtualRegister(FRC);
+      auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+      BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+        .addReg(MR.Reg, 0, MR.Sub);
+      BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+    }
+  }
+
+  return Changed;
+}
+
+
+bool CopyPropagation::isCopyReg(unsigned Opc) {
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case TargetOpcode::REG_SEQUENCE:
+    case Hexagon::A2_tfr:
+    case Hexagon::A2_tfrp:
+    case Hexagon::A2_combinew:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineri:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+
+bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
+  bool Changed = false;
+  unsigned Opc = MI.getOpcode();
+  BitTracker::RegisterRef RD = MI.getOperand(0);
+  assert(MI.getOperand(0).getSubReg() == 0);
+
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case Hexagon::A2_tfr:
+    case Hexagon::A2_tfrp: {
+      BitTracker::RegisterRef RS = MI.getOperand(1);
+      if (!HBS::isTransparentCopy(RD, RS, MRI))
+        break;
+      if (RS.Sub != 0)
+        Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
+      else
+        Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
+      break;
+    }
+    case TargetOpcode::REG_SEQUENCE: {
+      BitTracker::RegisterRef SL, SH;
+      if (HBS::parseRegSequence(MI, SL, SH)) {
+        Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+                                         SL.Reg, SL.Sub, MRI);
+        Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+                                          SH.Reg, SH.Sub, MRI);
+      }
+      break;
+    }
+    case Hexagon::A2_combinew: {
+      BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
+      Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+                                       RL.Reg, RL.Sub, MRI);
+      Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+                                        RH.Reg, RH.Sub, MRI);
+      break;
+    }
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineri: {
+      unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
+      unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
+                                                    : Hexagon::subreg_hireg;
+      BitTracker::RegisterRef RS = MI.getOperand(SrcX);
+      Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
+      break;
+    }
+  }
+  return Changed;
+}
+
+
+bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  bool Changed = false;
+  for (auto I : Instrs) {
+    unsigned Opc = I->getOpcode();
+    if (!CopyPropagation::isCopyReg(Opc))
+      continue;
+    Changed |= propagateRegCopy(*I);
+  }
+
+  return Changed;
+}
+
+
+//
+// Bit simplification
+//
+// Recognize patterns that can be simplified and replace them with the
+// simpler forms.
+// This is by no means complete
+namespace {
+  class BitSimplification : public Transformation {
+  public:
+    BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    struct RegHalf : public BitTracker::RegisterRef {
+      bool Low;  // Low/High halfword.
+    };
+
+    bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
+          unsigned B, RegHalf &RH);
+
+    bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
+          BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
+    unsigned getCombineOpcode(bool HLow, bool LLow);
+
+    bool genStoreUpperHalf(MachineInstr *MI);
+    bool genStoreImmediate(MachineInstr *MI);
+    bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+
+// Check if the bits [B..B+16) in register cell RC form a valid halfword,
+// i.e. [0..16), [16..32), etc. of some register. If so, return true and
+// set the information about the found register in RH.
+bool BitSimplification::matchHalf(unsigned SelfR,
+      const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
+  // XXX This could be searching in the set of available registers, in case
+  // the match is not exact.
+
+  // Match 16-bit chunks, where the RC[B..B+15] references exactly one
+  // register and all the bits B..B+15 match between RC and the register.
+  // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
+  // and RC = { [0]:0 [1-15]:v1[1-15]... }.
+  bool Low = false;
+  unsigned I = B;
+  while (I < B+16 && RC[I].num())
+    I++;
+  if (I == B+16)
+    return false;
+
+  unsigned Reg = RC[I].RefI.Reg;
+  unsigned P = RC[I].RefI.Pos;    // The RefI.Pos will be advanced by I-B.
+  if (P < I-B)
+    return false;
+  unsigned Pos = P - (I-B);
+
+  if (Reg == 0 || Reg == SelfR)    // Don't match "self".
+    return false;
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return false;
+  if (!BT.has(Reg))
+    return false;
+
+  const BitTracker::RegisterCell &SC = BT.lookup(Reg);
+  if (Pos+16 > SC.width())
+    return false;
+
+  for (unsigned i = 0; i < 16; ++i) {
+    const BitTracker::BitValue &RV = RC[i+B];
+    if (RV.Type == BitTracker::BitValue::Ref) {
+      if (RV.RefI.Reg != Reg)
+        return false;
+      if (RV.RefI.Pos != i+Pos)
+        return false;
+      continue;
+    }
+    if (RC[i+B] != SC[i+Pos])
+      return false;
+  }
+
+  unsigned Sub = 0;
+  switch (Pos) {
+    case 0:
+      Sub = Hexagon::subreg_loreg;
+      Low = true;
+      break;
+    case 16:
+      Sub = Hexagon::subreg_loreg;
+      Low = false;
+      break;
+    case 32:
+      Sub = Hexagon::subreg_hireg;
+      Low = true;
+      break;
+    case 48:
+      Sub = Hexagon::subreg_hireg;
+      Low = false;
+      break;
+    default:
+      return false;
+  }
+
+  RH.Reg = Reg;
+  RH.Sub = Sub;
+  RH.Low = Low;
+  // If the subregister is not valid with the register, set it to 0.
+  if (!HBS::getFinalVRegClass(RH, MRI))
+    RH.Sub = 0;
+
+  return true;
+}
+
+
+// Check if RC matches the pattern of a S2_packhl. If so, return true and
+// set the inputs Rs and Rt.
+bool BitSimplification::matchPackhl(unsigned SelfR,
+      const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
+      BitTracker::RegisterRef &Rt) {
+  RegHalf L1, H1, L2, H2;
+
+  if (!matchHalf(SelfR, RC, 0, L2)  || !matchHalf(SelfR, RC, 16, L1))
+    return false;
+  if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
+    return false;
+
+  // Rs = H1.L1, Rt = H2.L2
+  if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
+    return false;
+  if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
+    return false;
+
+  Rs = H1;
+  Rt = H2;
+  return true;
+}
+
+
+unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
+  return HLow ? LLow ? Hexagon::A2_combine_ll
+                     : Hexagon::A2_combine_lh
+              : LLow ? Hexagon::A2_combine_hl
+                     : Hexagon::A2_combine_hh;
+}
+
+
+// If MI stores the upper halfword of a register (potentially obtained via
+// shifts or extracts), replace it with a storerf instruction. This could
+// cause the "extraction" code to become dead.
+bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc != Hexagon::S2_storerh_io)
+    return false;
+
+  MachineOperand &ValOp = MI->getOperand(2);
+  BitTracker::RegisterRef RS = ValOp;
+  if (!BT.has(RS.Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+  RegHalf H;
+  if (!matchHalf(0, RC, 0, H))
+    return false;
+  if (H.Low)
+    return false;
+  MI->setDesc(HII.get(Hexagon::S2_storerf_io));
+  ValOp.setReg(H.Reg);
+  ValOp.setSubReg(H.Sub);
+  return true;
+}
+
+
+// If MI stores a value known at compile-time, and the value is within a range
+// that avoids using constant-extenders, replace it with a store-immediate.
+bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  unsigned Align = 0;
+  switch (Opc) {
+    case Hexagon::S2_storeri_io:
+      Align++;
+    case Hexagon::S2_storerh_io:
+      Align++;
+    case Hexagon::S2_storerb_io:
+      break;
+    default:
+      return false;
+  }
+
+  // Avoid stores to frame-indices (due to an unknown offset).
+  if (!MI->getOperand(0).isReg())
+    return false;
+  MachineOperand &OffOp = MI->getOperand(1);
+  if (!OffOp.isImm())
+    return false;
+
+  int64_t Off = OffOp.getImm();
+  // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
+  if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
+    return false;
+  // Source register:
+  BitTracker::RegisterRef RS = MI->getOperand(2);
+  if (!BT.has(RS.Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+  uint64_t U;
+  if (!HBS::getConst(RC, 0, RC.width(), U))
+    return false;
+
+  // Only consider 8-bit values to avoid constant-extenders.
+  int V;
+  switch (Opc) {
+    case Hexagon::S2_storerb_io:
+      V = int8_t(U);
+      break;
+    case Hexagon::S2_storerh_io:
+      V = int16_t(U);
+      break;
+    case Hexagon::S2_storeri_io:
+      V = int32_t(U);
+      break;
+  }
+  if (!isInt<8>(V))
+    return false;
+
+  MI->RemoveOperand(2);
+  switch (Opc) {
+    case Hexagon::S2_storerb_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
+      break;
+    case Hexagon::S2_storerh_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
+      break;
+    case Hexagon::S2_storeri_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
+      break;
+  }
+  MI->addOperand(MachineOperand::CreateImm(V));
+  return true;
+}
+
+
+// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
+// last instruction in a sequence that results in something equivalent to
+// the pack-halfwords. The intent is to cause the entire sequence to become
+// dead.
+bool BitSimplification::genPackhl(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == Hexagon::S2_packhl)
+    return false;
+  BitTracker::RegisterRef Rs, Rt;
+  if (!matchPackhl(RD.Reg, RC, Rs, Rt))
+    return false;
+
+  MachineBasicBlock &B = *MI->getParent();
+  unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+      .addReg(Rs.Reg, 0, Rs.Sub)
+      .addReg(Rt.Reg, 0, Rt.Sub);
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI produces halfword of the input in the low half of the output,
+// replace it with zero-extend or extractu.
+bool BitSimplification::genExtractHalf(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  RegHalf L;
+  // Check for halfword in low 16 bits, zeros elsewhere.
+  if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Prefer zxth, since zxth can go in any slot, while extractu only in
+  // slots 2 and 3.
+  unsigned NewR = 0;
+  if (L.Low && Opc != Hexagon::A2_zxth) {
+    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+        .addReg(L.Reg, 0, L.Sub);
+  } else if (!L.Low && Opc != Hexagon::S2_extractu) {
+    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR)
+        .addReg(L.Reg, 0, L.Sub)
+        .addImm(16)
+        .addImm(16);
+  }
+  if (NewR == 0)
+    return false;
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
+// combine.
+bool BitSimplification::genCombineHalf(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  RegHalf L, H;
+  // Check for combine h/l
+  if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
+    return false;
+  // Do nothing if this is just a reg copy.
+  if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  unsigned COpc = getCombineOpcode(H.Low, L.Low);
+  if (COpc == Opc)
+    return false;
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+  BuildMI(B, MI, DL, HII.get(COpc), NewR)
+      .addReg(H.Reg, 0, H.Sub)
+      .addReg(L.Reg, 0, L.Sub);
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI resets high bits of a register and keeps the lower ones, replace it
+// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
+bool BitSimplification::genExtractLow(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::A2_zxtb:
+    case Hexagon::A2_zxth:
+    case Hexagon::S2_extractu:
+      return false;
+  }
+  if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
+    int32_t Imm = MI->getOperand(2).getImm();
+    if (isInt<10>(Imm))
+      return false;
+  }
+
+  if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+    return false;
+  unsigned W = RC.width();
+  while (W > 0 && RC[W-1].is(0))
+    W--;
+  if (W == 0 || W == RC.width())
+    return false;
+  unsigned NewOpc = (W == 8)  ? Hexagon::A2_zxtb
+                  : (W == 16) ? Hexagon::A2_zxth
+                  : (W < 10)  ? Hexagon::A2_andir
+                  : Hexagon::S2_extractu;
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  for (auto &Op : MI->uses()) {
+    if (!Op.isReg())
+      continue;
+    BitTracker::RegisterRef RS = Op;
+    if (!BT.has(RS.Reg))
+      continue;
+    const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+    unsigned BN, BW;
+    if (!HBS::getSubregMask(RS, BN, BW, MRI))
+      continue;
+    if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
+      continue;
+
+    unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+                  .addReg(RS.Reg, 0, RS.Sub);
+    if (NewOpc == Hexagon::A2_andir)
+      MIB.addImm((1 << W) - 1);
+    else if (NewOpc == Hexagon::S2_extractu)
+      MIB.addImm(W).addImm(0);
+    HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+    BT.put(BitTracker::RegisterRef(NewR), RC);
+    return true;
+  }
+  return false;
+}
+
+
+// Check for tstbit simplification opportunity, where the bit being checked
+// can be tracked back to another register. For example:
+//   vreg2 = S2_lsr_i_r  vreg1, 5
+//   vreg3 = S2_tstbit_i vreg2, 0
+// =>
+//   vreg3 = S2_tstbit_i vreg1, 5
+bool BitSimplification::simplifyTstbit(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc != Hexagon::S2_tstbit_i)
+    return false;
+
+  unsigned BN = MI->getOperand(2).getImm();
+  BitTracker::RegisterRef RS = MI->getOperand(1);
+  unsigned F, W;
+  DebugLoc DL = MI->getDebugLoc();
+  if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
+    return false;
+  MachineBasicBlock &B = *MI->getParent();
+
+  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+  const BitTracker::BitValue &V = SC[F+BN];
+  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
+    const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
+    // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
+    // a double register, need to use a subregister and adjust bit
+    // number.
+    unsigned P = UINT_MAX;
+    BitTracker::RegisterRef RR(V.RefI.Reg, 0);
+    if (TC == &Hexagon::DoubleRegsRegClass) {
+      P = V.RefI.Pos;
+      RR.Sub = Hexagon::subreg_loreg;
+      if (P >= 32) {
+        P -= 32;
+        RR.Sub = Hexagon::subreg_hireg;
+      }
+    } else if (TC == &Hexagon::IntRegsRegClass) {
+      P = V.RefI.Pos;
+    }
+    if (P != UINT_MAX) {
+      unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+      BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+          .addReg(RR.Reg, 0, RR.Sub)
+          .addImm(P);
+      HBS::replaceReg(RD.Reg, NewR, MRI);
+      BT.put(NewR, RC);
+      return true;
+    }
+  } else if (V.is(0) || V.is(1)) {
+    unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+    unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+    BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+    HBS::replaceReg(RD.Reg, NewR, MRI);
+    return true;
+  }
+
+  return false;
+}
+
+
+bool BitSimplification::processBlock(MachineBasicBlock &B,
+      const RegisterSet &AVs) {
+  bool Changed = false;
+  RegisterSet AVB = AVs;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
+    MachineInstr *MI = &*I;
+    Defs.clear();
+    HBS::getInstrDefs(*MI, Defs);
+
+    unsigned Opc = MI->getOpcode();
+    if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+      continue;
+
+    if (MI->mayStore()) {
+      bool T = genStoreUpperHalf(MI);
+      T = T || genStoreImmediate(MI);
+      Changed |= T;
+      continue;
+    }
+
+    if (Defs.count() != 1)
+      continue;
+    const MachineOperand &Op0 = MI->getOperand(0);
+    if (!Op0.isReg() || !Op0.isDef())
+      continue;
+    BitTracker::RegisterRef RD = Op0;
+    if (!BT.has(RD.Reg))
+      continue;
+    const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+    const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
+
+    if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
+      bool T = genPackhl(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+
+    if (FRC->getID() == Hexagon::IntRegsRegClassID) {
+      bool T = genExtractHalf(MI, RD, RC);
+      T = T || genCombineHalf(MI, RD, RC);
+      T = T || genExtractLow(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+
+    if (FRC->getID() == Hexagon::PredRegsRegClassID) {
+      bool T = simplifyTstbit(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+  }
+  return Changed;
+}
+
+
+bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HRI = *HST.getRegisterInfo();
+  auto &HII = *HST.getInstrInfo();
+
+  MDT = &getAnalysis<MachineDominatorTree>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool Changed;
+
+  Changed = DeadCodeElimination(MF, *MDT).run();
+
+  const HexagonEvaluator HE(HRI, MRI, HII, MF);
+  BitTracker BT(HE, MF);
+  DEBUG(BT.trace(true));
+  BT.run();
+
+  MachineBasicBlock &Entry = MF.front();
+
+  RegisterSet AIG;  // Available registers for IG.
+  ConstGeneration ImmG(BT, HII, MRI);
+  Changed |= visitBlock(Entry, ImmG, AIG);
+
+  RegisterSet ARE;  // Available registers for RIE.
+  RedundantInstrElimination RIE(BT, HII, MRI);
+  Changed |= visitBlock(Entry, RIE, ARE);
+
+  RegisterSet ACG;  // Available registers for CG.
+  CopyGeneration CopyG(BT, HII, MRI);
+  Changed |= visitBlock(Entry, CopyG, ACG);
+
+  RegisterSet ACP;  // Available registers for CP.
+  CopyPropagation CopyP(HRI, MRI);
+  Changed |= visitBlock(Entry, CopyP, ACP);
+
+  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+  BT.run();
+  RegisterSet ABS;  // Available registers for BS.
+  BitSimplification BitS(BT, HII, MRI);
+  Changed |= visitBlock(Entry, BitS, ABS);
+
+  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+  if (Changed) {
+    for (auto &B : MF)
+      for (auto &I : B)
+        I.clearKillInfo();
+    DeadCodeElimination(MF, *MDT).run();
+  }
+  return Changed;
+}
+
+
+// Recognize loops where the code at the end of the loop matches the code
+// before the entry of the loop, and the matching code is such that is can
+// be simplified. This pass relies on the bit simplification above and only
+// prepares code in a way that can be handled by the bit simplifcation.
+//
+// This is the motivating testcase (and explanation):
+//
+// {
+//   loop0(.LBB0_2, r1)      // %for.body.preheader
+//   r5:4 = memd(r0++#8)
+// }
+// {
+//   r3 = lsr(r4, #16)
+//   r7:6 = combine(r5, r5)
+// }
+// {
+//   r3 = insert(r5, #16, #16)
+//   r7:6 = vlsrw(r7:6, #16)
+// }
+// .LBB0_2:
+// {
+//   memh(r2+#4) = r5
+//   memh(r2+#6) = r6            # R6 is really R5.H
+// }
+// {
+//   r2 = add(r2, #8)
+//   memh(r2+#0) = r4
+//   memh(r2+#2) = r3            # R3 is really R4.H
+// }
+// {
+//   r5:4 = memd(r0++#8)
+// }
+// {                             # "Shuffling" code that sets up R3 and R6
+//   r3 = lsr(r4, #16)           # so that their halves can be stored in the
+//   r7:6 = combine(r5, r5)      # next iteration. This could be folded into
+// }                             # the stores if the code was at the beginning
+// {                             # of the loop iteration. Since the same code
+//   r3 = insert(r5, #16, #16)   # precedes the loop, it can actually be moved
+//   r7:6 = vlsrw(r7:6, #16)     # there.
+// }:endloop0
+//
+//
+// The outcome:
+//
+// {
+//   loop0(.LBB0_2, r1)
+//   r5:4 = memd(r0++#8)
+// }
+// .LBB0_2:
+// {
+//   memh(r2+#4) = r5
+//   memh(r2+#6) = r5.h
+// }
+// {
+//   r2 = add(r2, #8)
+//   memh(r2+#0) = r4
+//   memh(r2+#2) = r4.h
+// }
+// {
+//   r5:4 = memd(r0++#8)
+// }:endloop0
+
+namespace llvm {
+  FunctionPass *createHexagonLoopRescheduling();
+  void initializeHexagonLoopReschedulingPass(PassRegistry&);
+}
+
+namespace {
+  class HexagonLoopRescheduling : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonLoopRescheduling() : MachineFunctionPass(ID),
+        HII(0), HRI(0), MRI(0), BTP(0) {
+      initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+    MachineRegisterInfo *MRI;
+    BitTracker *BTP;
+
+    struct LoopCand {
+      LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
+            MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
+      MachineBasicBlock *LB, *PB, *EB;
+    };
+    typedef std::vector<MachineInstr*> InstrList;
+    struct InstrGroup {
+      BitTracker::RegisterRef Inp, Out;
+      InstrList Ins;
+    };
+    struct PhiInfo {
+      PhiInfo(MachineInstr &P, MachineBasicBlock &B);
+      unsigned DefR;
+      BitTracker::RegisterRef LR, PR;
+      MachineBasicBlock *LB, *PB;
+    };
+
+    static unsigned getDefReg(const MachineInstr *MI);
+    bool isConst(unsigned Reg) const;
+    bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
+    bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
+    bool isShuffleOf(unsigned OutR, unsigned InpR) const;
+    bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
+        unsigned &InpR2) const;
+    void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
+        MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
+    bool processLoop(LoopCand &C);
+  };
+}
+
+char HexagonLoopRescheduling::ID = 0;
+
+INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
+  "Hexagon Loop Rescheduling", false, false)
+
+
+HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
+      MachineBasicBlock &B) {
+  DefR = HexagonLoopRescheduling::getDefReg(&P);
+  LB = &B;
+  PB = nullptr;
+  for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
+    const MachineOperand &OpB = P.getOperand(i+1);
+    if (OpB.getMBB() == &B) {
+      LR = P.getOperand(i);
+      continue;
+    }
+    PB = OpB.getMBB();
+    PR = P.getOperand(i);
+  }
+}
+
+
+unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
+  RegisterSet Defs;
+  HBS::getInstrDefs(*MI, Defs);
+  if (Defs.count() != 1)
+    return 0;
+  return Defs.find_first();
+}
+
+
+bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
+  if (!BTP->has(Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
+  for (unsigned i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (!V.is(0) && !V.is(1))
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
+      unsigned DefR) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case Hexagon::S2_lsr_i_r:
+    case Hexagon::S2_asr_i_r:
+    case Hexagon::S2_asl_i_r:
+    case Hexagon::S2_lsr_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_insert:
+    case Hexagon::A2_or:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_and:
+    case Hexagon::A2_andp:
+    case Hexagon::A2_combinew:
+    case Hexagon::A4_combineri:
+    case Hexagon::A4_combineir:
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+    case Hexagon::A2_combine_ll:
+    case Hexagon::A2_combine_lh:
+    case Hexagon::A2_combine_hl:
+    case Hexagon::A2_combine_hh:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
+      unsigned InpR) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    if (!Op.isReg())
+      continue;
+    if (Op.getReg() == InpR)
+      return i == n-1;
+  }
+  return false;
+}
+
+
+bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
+  if (!BTP->has(OutR) || !BTP->has(InpR))
+    return false;
+  const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
+  for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = OutC[i];
+    if (V.Type != BitTracker::BitValue::Ref)
+      continue;
+    if (V.RefI.Reg != InpR)
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
+      unsigned OutR2, unsigned &InpR2) const {
+  if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
+    return false;
+  const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
+  const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
+  unsigned W = OutC1.width();
+  unsigned MatchR = 0;
+  if (W != OutC2.width())
+    return false;
+  for (unsigned i = 0; i < W; ++i) {
+    const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
+    if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
+      return false;
+    if (V1.Type != BitTracker::BitValue::Ref)
+      continue;
+    if (V1.RefI.Pos != V2.RefI.Pos)
+      return false;
+    if (V1.RefI.Reg != InpR1)
+      return false;
+    if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
+      return false;
+    if (!MatchR)
+      MatchR = V2.RefI.Reg;
+    else if (V2.RefI.Reg != MatchR)
+      return false;
+  }
+  InpR2 = MatchR;
+  return true;
+}
+
+
+void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
+      MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
+      unsigned NewPredR) {
+  DenseMap<unsigned,unsigned> RegMap;
+
+  const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
+  unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+  BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
+    .addReg(NewPredR)
+    .addMBB(&PB)
+    .addReg(G.Inp.Reg)
+    .addMBB(&LB);
+  RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
+
+  for (unsigned i = G.Ins.size(); i > 0; --i) {
+    const MachineInstr *SI = G.Ins[i-1];
+    unsigned DR = getDefReg(SI);
+    const TargetRegisterClass *RC = MRI->getRegClass(DR);
+    unsigned NewDR = MRI->createVirtualRegister(RC);
+    DebugLoc DL = SI->getDebugLoc();
+
+    auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
+    for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
+      const MachineOperand &Op = SI->getOperand(j);
+      if (!Op.isReg()) {
+        MIB.addOperand(Op);
+        continue;
+      }
+      if (!Op.isUse())
+        continue;
+      unsigned UseR = RegMap[Op.getReg()];
+      MIB.addReg(UseR, 0, Op.getSubReg());
+    }
+    RegMap.insert(std::make_pair(DR, NewDR));
+  }
+
+  HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
+}
+
+
+bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
+  DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
+  std::vector<PhiInfo> Phis;
+  for (auto &I : *C.LB) {
+    if (!I.isPHI())
+      break;
+    unsigned PR = getDefReg(&I);
+    if (isConst(PR))
+      continue;
+    bool BadUse = false, GoodUse = false;
+    for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
+      MachineInstr *UseI = UI->getParent();
+      if (UseI->getParent() != C.LB) {
+        BadUse = true;
+        break;
+      }
+      if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
+        GoodUse = true;
+    }
+    if (BadUse || !GoodUse)
+      continue;
+
+    Phis.push_back(PhiInfo(I, *C.LB));
+  }
+
+  DEBUG({
+    dbgs() << "Phis: {";
+    for (auto &I : Phis) {
+      dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
+             << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
+             << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
+             << I.LB->getNumber() << ')';
+    }
+    dbgs() << " }\n";
+  });
+
+  if (Phis.empty())
+    return false;
+
+  bool Changed = false;
+  InstrList ShufIns;
+
+  // Go backwards in the block: for each bit shuffling instruction, check
+  // if that instruction could potentially be moved to the front of the loop:
+  // the output of the loop cannot be used in a non-shuffling instruction
+  // in this loop.
+  for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
+    if (I->isTerminator())
+      continue;
+    if (I->isPHI())
+      break;
+
+    RegisterSet Defs;
+    HBS::getInstrDefs(*I, Defs);
+    if (Defs.count() != 1)
+      continue;
+    unsigned DefR = Defs.find_first();
+    if (!TargetRegisterInfo::isVirtualRegister(DefR))
+      continue;
+    if (!isBitShuffle(&*I, DefR))
+      continue;
+
+    bool BadUse = false;
+    for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
+      MachineInstr *UseI = UI->getParent();
+      if (UseI->getParent() == C.LB) {
+        if (UseI->isPHI()) {
+          // If the use is in a phi node in this loop, then it should be
+          // the value corresponding to the back edge.
+          unsigned Idx = UI.getOperandNo();
+          if (UseI->getOperand(Idx+1).getMBB() != C.LB)
+            BadUse = true;
+        } else {
+          auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+          if (F == ShufIns.end())
+            BadUse = true;
+        }
+      } else {
+        // There is a use outside of the loop, but there is no epilog block
+        // suitable for a copy-out.
+        if (C.EB == nullptr)
+          BadUse = true;
+      }
+      if (BadUse)
+        break;
+    }
+
+    if (BadUse)
+      continue;
+    ShufIns.push_back(&*I);
+  }
+
+  // Partition the list of shuffling instructions into instruction groups,
+  // where each group has to be moved as a whole (i.e. a group is a chain of
+  // dependent instructions). A group produces a single live output register,
+  // which is meant to be the input of the loop phi node (although this is
+  // not checked here yet). It also uses a single register as its input,
+  // which is some value produced in the loop body. After moving the group
+  // to the beginning of the loop, that input register would need to be
+  // the loop-carried register (through a phi node) instead of the (currently
+  // loop-carried) output register.
+  typedef std::vector<InstrGroup> InstrGroupList;
+  InstrGroupList Groups;
+
+  for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
+    MachineInstr *SI = ShufIns[i];
+    if (SI == nullptr)
+      continue;
+
+    InstrGroup G;
+    G.Ins.push_back(SI);
+    G.Out.Reg = getDefReg(SI);
+    RegisterSet Inputs;
+    HBS::getInstrUses(*SI, Inputs);
+
+    for (unsigned j = i+1; j < n; ++j) {
+      MachineInstr *MI = ShufIns[j];
+      if (MI == nullptr)
+        continue;
+      RegisterSet Defs;
+      HBS::getInstrDefs(*MI, Defs);
+      // If this instruction does not define any pending inputs, skip it.
+      if (!Defs.intersects(Inputs))
+        continue;
+      // Otherwise, add it to the current group and remove the inputs that
+      // are defined by MI.
+      G.Ins.push_back(MI);
+      Inputs.remove(Defs);
+      // Then add all registers used by MI.
+      HBS::getInstrUses(*MI, Inputs);
+      ShufIns[j] = nullptr;
+    }
+
+    // Only add a group if it requires at most one register.
+    if (Inputs.count() > 1)
+      continue;
+    auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+      return G.Out.Reg == P.LR.Reg;
+    };
+    if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+      continue;
+
+    G.Inp.Reg = Inputs.find_first();
+    Groups.push_back(G);
+  }
+
+  DEBUG({
+    for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+      InstrGroup &G = Groups[i];
+      dbgs() << "Group[" << i << "] inp: "
+             << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
+             << "  out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
+      for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
+        dbgs() << "  " << *G.Ins[j];
+    }
+  });
+
+  for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+    InstrGroup &G = Groups[i];
+    if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
+      continue;
+    auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+      return G.Out.Reg == P.LR.Reg;
+    };
+    auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+    if (F == Phis.end())
+      continue;
+    unsigned PredR = 0;
+    if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
+      const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
+      unsigned Opc = DefPredR->getOpcode();
+      if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
+        continue;
+      if (!DefPredR->getOperand(1).isImm())
+        continue;
+      if (DefPredR->getOperand(1).getImm() != 0)
+        continue;
+      const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
+      if (RC != MRI->getRegClass(F->PR.Reg)) {
+        PredR = MRI->createVirtualRegister(RC);
+        unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
+                                                          : Hexagon::A2_tfrpi;
+        auto T = C.PB->getFirstTerminator();
+        DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
+        BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+          .addImm(0);
+      } else {
+        PredR = F->PR.Reg;
+      }
+    }
+    assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
+    moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  HII = HST.getInstrInfo();
+  HRI = HST.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+  BitTracker BT(HE, MF);
+  DEBUG(BT.trace(true));
+  BT.run();
+  BTP = &BT;
+
+  std::vector<LoopCand> Cand;
+
+  for (auto &B : MF) {
+    if (B.pred_size() != 2 || B.succ_size() != 2)
+      continue;
+    MachineBasicBlock *PB = nullptr;
+    bool IsLoop = false;
+    for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
+      if (*PI != &B)
+        PB = *PI;
+      else
+        IsLoop = true;
+    }
+    if (!IsLoop)
+      continue;
+
+    MachineBasicBlock *EB = nullptr;
+    for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
+      if (*SI == &B)
+        continue;
+      // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
+      // edge from B to EP is non-critical.
+      if ((*SI)->pred_size() == 1)
+        EB = *SI;
+      break;
+    }
+
+    Cand.push_back(LoopCand(&B, PB, EB));
+  }
+
+  bool Changed = false;
+  for (auto &C : Cand)
+    Changed |= processLoop(C);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopRescheduling() {
+  return new HexagonLoopRescheduling();
+}
+
+FunctionPass *llvm::createHexagonBitSimplify() {
+  return new HexagonBitSimplify();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
new file mode 100644
index 0000000..d5848dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -0,0 +1,1175 @@
+//===--- HexagonBitTracker.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
+                                   MachineRegisterInfo &mri,
+                                   const HexagonInstrInfo &tii,
+                                   MachineFunction &mf)
+    : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) {
+  // Populate the VRX map (VR to extension-type).
+  // Go over all the formal parameters of the function. If a given parameter
+  // P is sign- or zero-extended, locate the virtual register holding that
+  // parameter and create an entry in the VRX map indicating the type of ex-
+  // tension (and the source type).
+  // This is a bit complicated to do accurately, since the memory layout in-
+  // formation is necessary to precisely determine whether an aggregate para-
+  // meter will be passed in a register or in memory. What is given in MRI
+  // is the association between the physical register that is live-in (i.e.
+  // holds an argument), and the virtual register that this value will be
+  // copied into. This, by itself, is not sufficient to map back the virtual
+  // register to a formal parameter from Function (since consecutive live-ins
+  // from MRI may not correspond to consecutive formal parameters from Func-
+  // tion). To avoid the complications with in-memory arguments, only consi-
+  // der the initial sequence of formal parameters that are known to be
+  // passed via registers.
+  unsigned AttrIdx = 0;
+  unsigned InVirtReg, InPhysReg = 0;
+  const Function &F = *MF.getFunction();
+  typedef Function::const_arg_iterator arg_iterator;
+  for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+    AttrIdx++;
+    const Argument &Arg = *I;
+    Type *ATy = Arg.getType();
+    unsigned Width = 0;
+    if (ATy->isIntegerTy())
+      Width = ATy->getIntegerBitWidth();
+    else if (ATy->isPointerTy())
+      Width = 32;
+    // If pointer size is not set through target data, it will default to
+    // Module::AnyPointerSize.
+    if (Width == 0 || Width > 64)
+      break;
+    InPhysReg = getNextPhysReg(InPhysReg, Width);
+    if (!InPhysReg)
+      break;
+    InVirtReg = getVirtRegFor(InPhysReg);
+    if (!InVirtReg)
+      continue;
+    AttributeSet Attrs = F.getAttributes();
+    if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
+      VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
+    else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
+      VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width)));
+  }
+}
+
+
+BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  if (Sub == 0)
+    return MachineEvaluator::mask(Reg, 0);
+  using namespace Hexagon;
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  unsigned ID = RC->getID();
+  uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
+  switch (ID) {
+    case DoubleRegsRegClassID:
+    case VecDblRegsRegClassID:
+    case VecDblRegs128BRegClassID:
+      return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
+                                   : BT::BitMask(RW, 2*RW-1);
+    default:
+      break;
+  }
+#ifndef NDEBUG
+  dbgs() << PrintReg(Reg, &TRI, Sub) << '\n';
+#endif
+  llvm_unreachable("Unexpected register/subregister");
+}
+
+namespace {
+class RegisterRefs {
+  std::vector<BT::RegisterRef> Vector;
+
+public:
+  RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) {
+    for (unsigned i = 0, n = Vector.size(); i < n; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg())
+        Vector[i] = BT::RegisterRef(MO);
+      // For indices that don't correspond to registers, the entry will
+      // remain constructed via the default constructor.
+    }
+  }
+
+  size_t size() const { return Vector.size(); }
+  const BT::RegisterRef &operator[](unsigned n) const {
+    // The main purpose of this operator is to assert with bad argument.
+    assert(n < Vector.size());
+    return Vector[n];
+  }
+};
+}
+
+bool HexagonEvaluator::evaluate(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  unsigned NumDefs = 0;
+
+  // Sanity verification: there should not be any defs with subregisters.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    NumDefs++;
+    assert(MO.getSubReg() == 0);
+  }
+
+  if (NumDefs == 0)
+    return false;
+
+  if (MI->mayLoad())
+    return evaluateLoad(MI, Inputs, Outputs);
+
+  // Check COPY instructions that copy formal parameters into virtual
+  // registers. Such parameters can be sign- or zero-extended at the
+  // call site, and we should take advantage of this knowledge. The MRI
+  // keeps a list of pairs of live-in physical and virtual registers,
+  // which provides information about which virtual registers will hold
+  // the argument values. The function will still contain instructions
+  // defining those virtual registers, and in practice those are COPY
+  // instructions from a physical to a virtual register. In such cases,
+  // applying the argument extension to the virtual register can be seen
+  // as simply mirroring the extension that had already been applied to
+  // the physical register at the call site. If the defining instruction
+  // was not a COPY, it would not be clear how to mirror that extension
+  // on the callee's side. For that reason, only check COPY instructions
+  // for potential extensions.
+  if (MI->isCopy()) {
+    if (evaluateFormalCopy(MI, Inputs, Outputs))
+      return true;
+  }
+
+  // Beyond this point, if any operand is a global, skip that instruction.
+  // The reason is that certain instructions that can take an immediate
+  // operand can also have a global symbol in that operand. To avoid
+  // checking what kind of operand a given instruction has individually
+  // for each instruction, do it here. Global symbols as operands gene-
+  // rally do not provide any useful information.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() ||
+        MO.isCPI())
+      return false;
+  }
+
+  RegisterRefs Reg(MI);
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+  #define op(i) MI->getOperand(i)
+  #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs))
+  #define im(i) MI->getOperand(i).getImm()
+
+  // If the instruction has no register operands, skip it.
+  if (Reg.size() == 0)
+    return false;
+
+  // Record result for register in operand 0.
+  auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs)
+        -> bool {
+    putCell(Reg[0], Val, Outputs);
+    return true;
+  };
+  // Get the cell corresponding to the N-th operand.
+  auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W)
+        -> BT::RegisterCell {
+    const MachineOperand &Op = MI->getOperand(N);
+    if (Op.isImm())
+      return eIMM(Op.getImm(), W);
+    if (!Op.isReg())
+      return RegisterCell::self(0, W);
+    assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch");
+    return rc(N);
+  };
+  // Extract RW low bits of the cell.
+  auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW)
+        -> BT::RegisterCell {
+    assert(RW <= RC.width());
+    return eXTR(RC, 0, RW);
+  };
+  // Extract RW high bits of the cell.
+  auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW)
+        -> BT::RegisterCell {
+    uint16_t W = RC.width();
+    assert(RW <= W);
+    return eXTR(RC, W-RW, W);
+  };
+  // Extract N-th halfword (counting from the least significant position).
+  auto half = [this] (const BT::RegisterCell &RC, unsigned N)
+        -> BT::RegisterCell {
+    assert(N*16+16 <= RC.width());
+    return eXTR(RC, N*16, N*16+16);
+  };
+  // Shuffle bits (pick even/odd from cells and merge into result).
+  auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt,
+                         uint16_t BW, bool Odd) -> BT::RegisterCell {
+    uint16_t I = Odd, Ws = Rs.width();
+    assert(Ws == Rt.width());
+    RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW));
+    I += 2;
+    while (I*BW < Ws) {
+      RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW));
+      I += 2;
+    }
+    return RC;
+  };
+
+  // The bitwidth of the 0th operand. In most (if not all) of the
+  // instructions below, the 0th operand is the defined register.
+  // Pre-compute the bitwidth here, because it is needed in many cases
+  // cases below.
+  uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+
+  switch (Opc) {
+    // Transfer immediate:
+
+    case A2_tfrsi:
+    case A2_tfrpi:
+    case CONST32:
+    case CONST32_Float_Real:
+    case CONST32_Int_Real:
+    case CONST64_Float_Real:
+    case CONST64_Int_Real:
+      return rr0(eIMM(im(1), W0), Outputs);
+    case TFR_PdFalse:
+      return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs);
+    case TFR_PdTrue:
+      return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs);
+    case TFR_FI: {
+      int FI = op(1).getIndex();
+      int Off = op(2).getImm();
+      unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
+      unsigned L = Log2_32(A);
+      RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+      RC.fill(0, L, BT::BitValue::Zero);
+      return rr0(RC, Outputs);
+    }
+
+    // Transfer register:
+
+    case A2_tfr:
+    case A2_tfrp:
+    case C2_pxfer_map:
+      return rr0(rc(1), Outputs);
+    case C2_tfrpr: {
+      uint16_t RW = W0;
+      uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+      assert(PW <= RW);
+      RegisterCell PC = eXTR(rc(1), 0, PW);
+      RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1));
+      RC.fill(PW, RW, BT::BitValue::Zero);
+      return rr0(RC, Outputs);
+    }
+    case C2_tfrrp: {
+      RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+      W0 = 8; // XXX Pred size
+      return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+    }
+
+    // Arithmetic:
+
+    case A2_abs:
+    case A2_absp:
+      // TODO
+      break;
+
+    case A2_addsp: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      assert(W0 == 64 && W1 == 32);
+      RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1));
+      RegisterCell RC = eADD(eSXT(CW, W1), rc(2));
+      return rr0(RC, Outputs);
+    }
+    case A2_add:
+    case A2_addp:
+      return rr0(eADD(rc(1), rc(2)), Outputs);
+    case A2_addi:
+      return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs);
+    case S4_addi_asl_ri: {
+      RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_addi_lsr_ri: {
+      RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_addaddi: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addi: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyrr_addi: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addr_u2: {
+      RegisterCell M = eMLS(eIMM(im(2), W0), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addr: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyrr_addr: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case S4_subaddi: {
+      RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_accii: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M2_acci: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_subacc: {
+      RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case S2_addasl_rrri: {
+      RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case C4_addipc: {
+      RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0);
+      RPC.fill(0, 2, BT::BitValue::Zero);
+      return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs);
+    }
+    case A2_sub:
+    case A2_subp:
+      return rr0(eSUB(rc(1), rc(2)), Outputs);
+    case A2_subri:
+      return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs);
+    case S4_subi_asl_ri: {
+      RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_subi_lsr_ri: {
+      RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_naccii: {
+      RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M2_nacci: {
+      RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    // 32-bit negation is done by "Rd = A2_subri 0, Rs"
+    case A2_negp:
+      return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs);
+
+    case M2_mpy_up: {
+      RegisterCell M = eMLS(rc(1), rc(2));
+      return rr0(hi(M, W0), Outputs);
+    }
+    case M2_dpmpyss_s0:
+      return rr0(eMLS(rc(1), rc(2)), Outputs);
+    case M2_dpmpyss_acc_s0:
+      return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs);
+    case M2_dpmpyss_nac_s0:
+      return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs);
+    case M2_mpyi: {
+      RegisterCell M = eMLS(rc(1), rc(2));
+      return rr0(lo(M, W0), Outputs);
+    }
+    case M2_macsip: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_macsin: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eSUB(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_maci: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_mpysmi: {
+      RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpysin: {
+      RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpysip: {
+      RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpyu_up: {
+      RegisterCell M = eMLU(rc(1), rc(2));
+      return rr0(hi(M, W0), Outputs);
+    }
+    case M2_dpmpyuu_s0:
+      return rr0(eMLU(rc(1), rc(2)), Outputs);
+    case M2_dpmpyuu_acc_s0:
+      return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs);
+    case M2_dpmpyuu_nac_s0:
+      return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs);
+    //case M2_mpysu_up:
+
+    // Logical/bitwise:
+
+    case A2_andir:
+      return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs);
+    case A2_and:
+    case A2_andp:
+      return rr0(eAND(rc(1), rc(2)), Outputs);
+    case A4_andn:
+    case A4_andnp:
+      return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+    case S4_andi_asl_ri: {
+      RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_andi_lsr_ri: {
+      RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M4_and_and:
+      return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_and_andn:
+      return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case M4_and_or:
+      return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_and_xor:
+      return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_orir:
+      return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs);
+    case A2_or:
+    case A2_orp:
+      return rr0(eORL(rc(1), rc(2)), Outputs);
+    case A4_orn:
+    case A4_ornp:
+      return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+    case S4_ori_asl_ri: {
+      RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_ori_lsr_ri: {
+      RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M4_or_and:
+      return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_or_andn:
+      return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case S4_or_andi:
+    case S4_or_andix: {
+      RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case S4_or_ori: {
+      RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M4_or_or:
+      return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_or_xor:
+      return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_xor:
+    case A2_xorp:
+      return rr0(eXOR(rc(1), rc(2)), Outputs);
+    case M4_xor_and:
+      return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_xor_andn:
+      return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case M4_xor_or:
+      return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_xor_xacc:
+      return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_not:
+    case A2_notp:
+      return rr0(eNOT(rc(1)), Outputs);
+
+    case S2_asl_i_r:
+    case S2_asl_i_p:
+      return rr0(eASL(rc(1), im(2)), Outputs);
+    case A2_aslh:
+      return rr0(eASL(rc(1), 16), Outputs);
+    case S2_asl_i_r_acc:
+    case S2_asl_i_p_acc:
+      return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_nac:
+    case S2_asl_i_p_nac:
+      return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_and:
+    case S2_asl_i_p_and:
+      return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_or:
+    case S2_asl_i_p_or:
+      return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_xacc:
+    case S2_asl_i_p_xacc:
+      return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_vh:
+    case S2_asl_i_vw:
+      // TODO
+      break;
+
+    case S2_asr_i_r:
+    case S2_asr_i_p:
+      return rr0(eASR(rc(1), im(2)), Outputs);
+    case A2_asrh:
+      return rr0(eASR(rc(1), 16), Outputs);
+    case S2_asr_i_r_acc:
+    case S2_asr_i_p_acc:
+      return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_nac:
+    case S2_asr_i_p_nac:
+      return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_and:
+    case S2_asr_i_p_and:
+      return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_or:
+    case S2_asr_i_p_or:
+      return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_rnd: {
+      // The input is first sign-extended to 64 bits, then the output
+      // is truncated back to 32 bits.
+      assert(W0 == 32);
+      RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+      RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1);
+      return rr0(eXTR(RC, 0, W0), Outputs);
+    }
+    case S2_asr_i_r_rnd_goodsyntax: {
+      int64_t S = im(2);
+      if (S == 0)
+        return rr0(rc(1), Outputs);
+      // Result: S2_asr_i_r_rnd Rs, u5-1
+      RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+      RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1);
+      return rr0(eXTR(RC, 0, W0), Outputs);
+    }
+    case S2_asr_r_vh:
+    case S2_asr_i_vw:
+    case S2_asr_i_svw_trun:
+      // TODO
+      break;
+
+    case S2_lsr_i_r:
+    case S2_lsr_i_p:
+      return rr0(eLSR(rc(1), im(2)), Outputs);
+    case S2_lsr_i_r_acc:
+    case S2_lsr_i_p_acc:
+      return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_nac:
+    case S2_lsr_i_p_nac:
+      return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_and:
+    case S2_lsr_i_p_and:
+      return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_or:
+    case S2_lsr_i_p_or:
+      return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_xacc:
+    case S2_lsr_i_p_xacc:
+      return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs);
+
+    case S2_clrbit_i: {
+      RegisterCell RC = rc(1);
+      RC[im(2)] = BT::BitValue::Zero;
+      return rr0(RC, Outputs);
+    }
+    case S2_setbit_i: {
+      RegisterCell RC = rc(1);
+      RC[im(2)] = BT::BitValue::One;
+      return rr0(RC, Outputs);
+    }
+    case S2_togglebit_i: {
+      RegisterCell RC = rc(1);
+      uint16_t BX = im(2);
+      RC[BX] = RC[BX].is(0) ? BT::BitValue::One
+                            : RC[BX].is(1) ? BT::BitValue::Zero
+                                           : BT::BitValue::self();
+      return rr0(RC, Outputs);
+    }
+
+    case A4_bitspliti: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      uint16_t BX = im(2);
+      // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx]
+      const BT::BitValue Zero = BT::BitValue::Zero;
+      RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero)
+                                        .fill(W1+(W1-BX), W0, Zero);
+      RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1);
+      RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1);
+      return rr0(RC, Outputs);
+    }
+    case S4_extract:
+    case S4_extractp:
+    case S2_extractu:
+    case S2_extractup: {
+      uint16_t Wd = im(2), Of = im(3);
+      assert(Wd <= W0);
+      if (Wd == 0)
+        return rr0(eIMM(0, W0), Outputs);
+      // If the width extends beyond the register size, pad the register
+      // with 0 bits.
+      RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1);
+      RegisterCell Ext = eXTR(Pad, Of, Wd+Of);
+      // Ext is short, need to extend it with 0s or sign bit.
+      RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1));
+      if (Opc == S2_extractu || Opc == S2_extractup)
+        return rr0(eZXT(RC, Wd), Outputs);
+      return rr0(eSXT(RC, Wd), Outputs);
+    }
+    case S2_insert:
+    case S2_insertp: {
+      uint16_t Wd = im(3), Of = im(4);
+      assert(Wd < W0 && Of < W0);
+      // If Wd+Of exceeds W0, the inserted bits are truncated.
+      if (Wd+Of > W0)
+        Wd = W0-Of;
+      if (Wd == 0)
+        return rr0(rc(1), Outputs);
+      return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs);
+    }
+
+    // Bit permutations:
+
+    case A2_combineii:
+    case A4_combineii:
+    case A4_combineir:
+    case A4_combineri:
+    case A2_combinew:
+      assert(W0 % 2 == 0);
+      return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
+    case A2_combine_ll:
+    case A2_combine_lh:
+    case A2_combine_hl:
+    case A2_combine_hh: {
+      assert(W0 == 32);
+      assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+      // Low half in the output is 0 for _ll and _hl, 1 otherwise:
+      unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl);
+      // High half in the output is 0 for _ll and _lh, 1 otherwise:
+      unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh);
+      RegisterCell R1 = rc(1);
+      RegisterCell R2 = rc(2);
+      RegisterCell RC = half(R2, LoH).cat(half(R1, HiH));
+      return rr0(RC, Outputs);
+    }
+    case S2_packhl: {
+      assert(W0 == 64);
+      assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+      RegisterCell R1 = rc(1);
+      RegisterCell R2 = rc(2);
+      RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1))
+                                   .cat(half(R1, 1));
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffeb: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 8, false);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffeh: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 16, false);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffob: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 8, true);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffoh: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 16, true);
+      return rr0(RC, Outputs);
+    }
+    case C2_mask: {
+      uint16_t WR = W0;
+      uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+      assert(WR == 64 && WP == 8);
+      RegisterCell R1 = rc(1);
+      RegisterCell RC(WR);
+      for (uint16_t i = 0; i < WP; ++i) {
+        const BT::BitValue &V = R1[i];
+        BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self();
+        RC.fill(i*8, i*8+8, F);
+      }
+      return rr0(RC, Outputs);
+    }
+
+    // Mux:
+
+    case C2_muxii:
+    case C2_muxir:
+    case C2_muxri:
+    case C2_mux: {
+      BT::BitValue PC0 = rc(1)[0];
+      RegisterCell R2 = cop(2, W0);
+      RegisterCell R3 = cop(3, W0);
+      if (PC0.is(0) || PC0.is(1))
+        return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs);
+      R2.meet(R3, Reg[0].Reg);
+      return rr0(R2, Outputs);
+    }
+    case C2_vmux:
+      // TODO
+      break;
+
+    // Sign- and zero-extension:
+
+    case A2_sxtb:
+      return rr0(eSXT(rc(1), 8), Outputs);
+    case A2_sxth:
+      return rr0(eSXT(rc(1), 16), Outputs);
+    case A2_sxtw: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      assert(W0 == 64 && W1 == 32);
+      RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1);
+      return rr0(RC, Outputs);
+    }
+    case A2_zxtb:
+      return rr0(eZXT(rc(1), 8), Outputs);
+    case A2_zxth:
+      return rr0(eZXT(rc(1), 16), Outputs);
+
+    // Bit count:
+
+    case S2_cl0:
+    case S2_cl0p:
+      // Always produce a 32-bit result.
+      return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+    case S2_cl1:
+    case S2_cl1p:
+      return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+    case S2_clb:
+    case S2_clbp: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      RegisterCell R1 = rc(1);
+      BT::BitValue TV = R1[W1-1];
+      if (TV.is(0) || TV.is(1))
+        return rr0(eCLB(R1, TV, 32), Outputs);
+      break;
+    }
+    case S2_ct0:
+    case S2_ct0p:
+      return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+    case S2_ct1:
+    case S2_ct1p:
+      return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+    case S5_popcountp:
+      // TODO
+      break;
+
+    case C2_all8: {
+      RegisterCell P1 = rc(1);
+      bool Has0 = false, All1 = true;
+      for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+        if (!P1[i].is(1))
+          All1 = false;
+        if (!P1[i].is(0))
+          continue;
+        Has0 = true;
+        break;
+      }
+      if (!Has0 && !All1)
+        break;
+      RegisterCell RC(W0);
+      RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero));
+      return rr0(RC, Outputs);
+    }
+    case C2_any8: {
+      RegisterCell P1 = rc(1);
+      bool Has1 = false, All0 = true;
+      for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+        if (!P1[i].is(0))
+          All0 = false;
+        if (!P1[i].is(1))
+          continue;
+        Has1 = true;
+        break;
+      }
+      if (!Has1 && !All0)
+        break;
+      RegisterCell RC(W0);
+      RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero));
+      return rr0(RC, Outputs);
+    }
+    case C2_and:
+      return rr0(eAND(rc(1), rc(2)), Outputs);
+    case C2_andn:
+      return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+    case C2_not:
+      return rr0(eNOT(rc(1)), Outputs);
+    case C2_or:
+      return rr0(eORL(rc(1), rc(2)), Outputs);
+    case C2_orn:
+      return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+    case C2_xor:
+      return rr0(eXOR(rc(1), rc(2)), Outputs);
+    case C4_and_and:
+      return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case C4_and_andn:
+      return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_and_or:
+      return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case C4_and_orn:
+      return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_or_and:
+      return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case C4_or_andn:
+      return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_or_or:
+      return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case C4_or_orn:
+      return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+    case C2_bitsclr:
+    case C2_bitsclri:
+    case C2_bitsset:
+    case C4_nbitsclr:
+    case C4_nbitsclri:
+    case C4_nbitsset:
+      // TODO
+      break;
+    case S2_tstbit_i:
+    case S4_ntstbit_i: {
+      BT::BitValue V = rc(1)[im(2)];
+      if (V.is(0) || V.is(1)) {
+        // If instruction is S2_tstbit_i, test for 1, otherwise test for 0.
+        bool TV = (Opc == S2_tstbit_i);
+        BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero;
+        return rr0(RegisterCell(W0).fill(0, W0, F), Outputs);
+      }
+      break;
+    }
+
+    default:
+      return MachineEvaluator::evaluate(MI, Inputs, Outputs);
+  }
+  #undef im
+  #undef rc
+  #undef op
+  return false;
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *BI,
+      const CellMapType &Inputs, BranchTargetList &Targets,
+      bool &FallsThru) const {
+  // We need to evaluate one branch at a time. TII::AnalyzeBranch checks
+  // all the branches in a basic block at once, so we cannot use it.
+  unsigned Opc = BI->getOpcode();
+  bool SimpleBranch = false;
+  bool Negated = false;
+  switch (Opc) {
+    case Hexagon::J2_jumpf:
+    case Hexagon::J2_jumpfnew:
+    case Hexagon::J2_jumpfnewpt:
+      Negated = true;
+    case Hexagon::J2_jumpt:
+    case Hexagon::J2_jumptnew:
+    case Hexagon::J2_jumptnewpt:
+      // Simple branch:  if([!]Pn) jump ...
+      // i.e. Op0 = predicate, Op1 = branch target.
+      SimpleBranch = true;
+      break;
+    case Hexagon::J2_jump:
+      Targets.insert(BI->getOperand(0).getMBB());
+      FallsThru = false;
+      return true;
+    default:
+      // If the branch is of unknown type, assume that all successors are
+      // executable.
+      return false;
+  }
+
+  if (!SimpleBranch)
+    return false;
+
+  // BI is a conditional branch if we got here.
+  RegisterRef PR = BI->getOperand(0);
+  RegisterCell PC = getCell(PR, Inputs);
+  const BT::BitValue &Test = PC[0];
+
+  // If the condition is neither true nor false, then it's unknown.
+  if (!Test.is(0) && !Test.is(1))
+    return false;
+
+  // "Test.is(!Negated)" means "branch condition is true".
+  if (!Test.is(!Negated)) {
+    // Condition known to be false.
+    FallsThru = true;
+    return true;
+  }
+
+  Targets.insert(BI->getOperand(1).getMBB());
+  FallsThru = false;
+  return true;
+}
+
+
+bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  if (TII.isPredicated(MI))
+    return false;
+  assert(MI->mayLoad() && "A load that mayn't?");
+  unsigned Opc = MI->getOpcode();
+
+  uint16_t BitNum;
+  bool SignEx;
+  using namespace Hexagon;
+
+  switch (Opc) {
+    default:
+      return false;
+
+#if 0
+    // memb_fifo
+    case L2_loadalignb_pbr:
+    case L2_loadalignb_pcr:
+    case L2_loadalignb_pi:
+    // memh_fifo
+    case L2_loadalignh_pbr:
+    case L2_loadalignh_pcr:
+    case L2_loadalignh_pi:
+    // membh
+    case L2_loadbsw2_pbr:
+    case L2_loadbsw2_pci:
+    case L2_loadbsw2_pcr:
+    case L2_loadbsw2_pi:
+    case L2_loadbsw4_pbr:
+    case L2_loadbsw4_pci:
+    case L2_loadbsw4_pcr:
+    case L2_loadbsw4_pi:
+    // memubh
+    case L2_loadbzw2_pbr:
+    case L2_loadbzw2_pci:
+    case L2_loadbzw2_pcr:
+    case L2_loadbzw2_pi:
+    case L2_loadbzw4_pbr:
+    case L2_loadbzw4_pci:
+    case L2_loadbzw4_pcr:
+    case L2_loadbzw4_pi:
+#endif
+
+    case L2_loadrbgp:
+    case L2_loadrb_io:
+    case L2_loadrb_pbr:
+    case L2_loadrb_pci:
+    case L2_loadrb_pcr:
+    case L2_loadrb_pi:
+    case L4_loadrb_abs:
+    case L4_loadrb_ap:
+    case L4_loadrb_rr:
+    case L4_loadrb_ur:
+      BitNum = 8;
+      SignEx = true;
+      break;
+
+    case L2_loadrubgp:
+    case L2_loadrub_io:
+    case L2_loadrub_pbr:
+    case L2_loadrub_pci:
+    case L2_loadrub_pcr:
+    case L2_loadrub_pi:
+    case L4_loadrub_abs:
+    case L4_loadrub_ap:
+    case L4_loadrub_rr:
+    case L4_loadrub_ur:
+      BitNum = 8;
+      SignEx = false;
+      break;
+
+    case L2_loadrhgp:
+    case L2_loadrh_io:
+    case L2_loadrh_pbr:
+    case L2_loadrh_pci:
+    case L2_loadrh_pcr:
+    case L2_loadrh_pi:
+    case L4_loadrh_abs:
+    case L4_loadrh_ap:
+    case L4_loadrh_rr:
+    case L4_loadrh_ur:
+      BitNum = 16;
+      SignEx = true;
+      break;
+
+    case L2_loadruhgp:
+    case L2_loadruh_io:
+    case L2_loadruh_pbr:
+    case L2_loadruh_pci:
+    case L2_loadruh_pcr:
+    case L2_loadruh_pi:
+    case L4_loadruh_rr:
+    case L4_loadruh_abs:
+    case L4_loadruh_ap:
+    case L4_loadruh_ur:
+      BitNum = 16;
+      SignEx = false;
+      break;
+
+    case L2_loadrigp:
+    case L2_loadri_io:
+    case L2_loadri_pbr:
+    case L2_loadri_pci:
+    case L2_loadri_pcr:
+    case L2_loadri_pi:
+    case L2_loadw_locked:
+    case L4_loadri_abs:
+    case L4_loadri_ap:
+    case L4_loadri_rr:
+    case L4_loadri_ur:
+    case LDriw_pred:
+      BitNum = 32;
+      SignEx = true;
+      break;
+
+    case L2_loadrdgp:
+    case L2_loadrd_io:
+    case L2_loadrd_pbr:
+    case L2_loadrd_pci:
+    case L2_loadrd_pcr:
+    case L2_loadrd_pi:
+    case L4_loadd_locked:
+    case L4_loadrd_abs:
+    case L4_loadrd_ap:
+    case L4_loadrd_rr:
+    case L4_loadrd_ur:
+      BitNum = 64;
+      SignEx = true;
+      break;
+  }
+
+  const MachineOperand &MD = MI->getOperand(0);
+  assert(MD.isReg() && MD.isDef());
+  RegisterRef RD = MD;
+
+  uint16_t W = getRegBitWidth(RD);
+  assert(W >= BitNum && BitNum > 0);
+  RegisterCell Res(W);
+
+  for (uint16_t i = 0; i < BitNum; ++i)
+    Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i));
+
+  if (SignEx) {
+    const BT::BitValue &Sign = Res[BitNum-1];
+    for (uint16_t i = BitNum; i < W; ++i)
+      Res[i] = BT::BitValue::ref(Sign);
+  } else {
+    for (uint16_t i = BitNum; i < W; ++i)
+      Res[i] = BT::BitValue::Zero;
+  }
+
+  putCell(RD, Res, Outputs);
+  return true;
+}
+
+
+bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  // If MI defines a formal parameter, but is not a copy (loads are handled
+  // in evaluateLoad), then it's not clear what to do.
+  assert(MI->isCopy());
+
+  RegisterRef RD = MI->getOperand(0);
+  RegisterRef RS = MI->getOperand(1);
+  assert(RD.Sub == 0);
+  if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg))
+    return false;
+  RegExtMap::const_iterator F = VRX.find(RD.Reg);
+  if (F == VRX.end())
+    return false;
+
+  uint16_t EW = F->second.Width;
+  // Store RD's cell into the map. This will associate the cell with a virtual
+  // register, and make zero-/sign-extends possible (otherwise we would be ex-
+  // tending "self" bit values, which will have no effect, since "self" values
+  // cannot be references to anything).
+  putCell(RD, getCell(RS, Inputs), Outputs);
+
+  RegisterCell Res;
+  // Read RD's cell from the outputs instead of RS's cell from the inputs:
+  if (F->second.Type == ExtType::SExt)
+    Res = eSXT(getCell(RD, Outputs), EW);
+  else if (F->second.Type == ExtType::ZExt)
+    Res = eZXT(getCell(RD, Outputs), EW);
+
+  putCell(RD, Res, Outputs);
+  return true;
+}
+
+
+unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
+  using namespace Hexagon;
+  bool Is64 = DoubleRegsRegClass.contains(PReg);
+  assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
+
+  static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 };
+  static const unsigned Phys64[] = { D0, D1, D2 };
+  const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned);
+  const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned);
+
+  // Return the first parameter register of the required width.
+  if (PReg == 0)
+    return (Width <= 32) ? Phys32[0] : Phys64[0];
+
+  // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the
+  // next register.
+  unsigned Idx32 = 0, Idx64 = 0;
+  if (!Is64) {
+    while (Idx32 < Num32) {
+      if (Phys32[Idx32] == PReg)
+        break;
+      Idx32++;
+    }
+    Idx64 = Idx32/2;
+  } else {
+    while (Idx64 < Num64) {
+      if (Phys64[Idx64] == PReg)
+        break;
+      Idx64++;
+    }
+    Idx32 = Idx64*2+1;
+  }
+
+  if (Width <= 32)
+    return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0;
+  return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
+}
+
+
+unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
+  typedef MachineRegisterInfo::livein_iterator iterator;
+  for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+    if (I->first == PReg)
+      return I->second;
+  }
+  return 0;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
new file mode 100644
index 0000000..897af2d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h
@@ -0,0 +1,64 @@
+//===--- HexagonBitTracker.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBITTRACKER_H
+#define HEXAGONBITTRACKER_H
+
+#include "BitTracker.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class HexagonInstrInfo;
+  class HexagonRegisterInfo;
+
+struct HexagonEvaluator : public BitTracker::MachineEvaluator {
+  typedef BitTracker::CellMapType CellMapType;
+  typedef BitTracker::RegisterRef RegisterRef;
+  typedef BitTracker::RegisterCell RegisterCell;
+  typedef BitTracker::BranchTargetList BranchTargetList;
+
+  HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri,
+                   const HexagonInstrInfo &tii, MachineFunction &mf);
+
+  bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+                CellMapType &Outputs) const override;
+  bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+                BranchTargetList &Targets, bool &FallsThru) const override;
+
+  BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override;
+
+  MachineFunction &MF;
+  MachineFrameInfo &MFI;
+  const HexagonInstrInfo &TII;
+
+private:
+  bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs,
+                    CellMapType &Outputs) const;
+  bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs,
+                          CellMapType &Outputs) const;
+
+  unsigned getNextPhysReg(unsigned PReg, unsigned Width) const;
+  unsigned getVirtRegFor(unsigned PReg) const;
+
+  // Type of formal parameter extension.
+  struct ExtType {
+    enum { SExt, ZExt };
+    char Type;
+    uint16_t Width;
+    ExtType() : Type(0), Width(0) {}
+    ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+  };
+  // Map VR -> extension type.
+  typedef DenseMap<unsigned, ExtType> RegExtMap;
+  RegExtMap VRX;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 0000000..efafdd0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,247 @@
+//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon_cfg"
+
+namespace llvm {
+  FunctionPass *createHexagonCFGOptimizer();
+  void initializeHexagonCFGOptimizerPass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+  void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+  static char ID;
+  HexagonCFGOptimizer() : MachineFunctionPass(ID) {
+    initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
+  }
+
+  const char *getPassName() const override {
+    return "Hexagon CFG Optimizer";
+  }
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+  return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf)
+    || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+  return (Opc == Hexagon::J2_jump);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+                                               MachineBasicBlock* NewTarget) {
+  const TargetInstrInfo *TII =
+      MI->getParent()->getParent()->getSubtarget().getInstrInfo();
+  int NewOpcode = 0;
+  switch(MI->getOpcode()) {
+  case Hexagon::J2_jumpt:
+    NewOpcode = Hexagon::J2_jumpf;
+    break;
+
+  case Hexagon::J2_jumpf:
+    NewOpcode = Hexagon::J2_jumpt;
+    break;
+
+  case Hexagon::J2_jumptnewpt:
+    NewOpcode = Hexagon::J2_jumpfnewpt;
+    break;
+
+  case Hexagon::J2_jumpfnewpt:
+    NewOpcode = Hexagon::J2_jumptnewpt;
+    break;
+
+  default:
+    llvm_unreachable("Cannot handle this case");
+  }
+
+  MI->setDesc(TII->get(NewOpcode));
+  MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+
+    // Traverse the basic block.
+    MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+    if (MII != MBB->end()) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (IsConditionalBranch(Opc)) {
+
+        //
+        // (Case 1) Transform the code if the following condition occurs:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...next block in layout is BB3...
+        //   BB3: ...
+        //
+        //  Transform this to:
+        //  BB1: if (!p0) jump BB4
+        //  Remove BB2
+        //  BB3: ...
+        //
+        // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...other basic blocks ...
+        //   BB4:
+        //   ...not a fall-thru
+        //   BB3: ...
+        //     jump BB4
+        //
+        // Transform this to:
+        //   BB1: if (!p0) jump BB4
+        //   Remove BB2
+        //   BB3: ...
+        //   BB4: ...
+        //
+        unsigned NumSuccs = MBB->succ_size();
+        MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+        MachineBasicBlock* FirstSucc = *SI;
+        MachineBasicBlock* SecondSucc = *(++SI);
+        MachineBasicBlock* LayoutSucc = nullptr;
+        MachineBasicBlock* JumpAroundTarget = nullptr;
+
+        if (MBB->isLayoutSuccessor(FirstSucc)) {
+          LayoutSucc = FirstSucc;
+          JumpAroundTarget = SecondSucc;
+        } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+          LayoutSucc = SecondSucc;
+          JumpAroundTarget = FirstSucc;
+        } else {
+          // Odd case...cannot handle.
+        }
+
+        // The target of the unconditional branch must be JumpAroundTarget.
+        // TODO: If not, we should not invert the unconditional branch.
+        MachineBasicBlock* CondBranchTarget = nullptr;
+        if ((MI->getOpcode() == Hexagon::J2_jumpt) ||
+            (MI->getOpcode() == Hexagon::J2_jumpf)) {
+          CondBranchTarget = MI->getOperand(1).getMBB();
+        }
+
+        if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+          continue;
+        }
+
+        if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+          // Ensure that BB2 has one instruction -- an unconditional jump.
+          if ((LayoutSucc->size() == 1) &&
+              IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+            MachineBasicBlock* UncondTarget =
+              LayoutSucc->front().getOperand(0).getMBB();
+            // Check if the layout successor of BB2 is BB3.
+            bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+            bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+              JumpAroundTarget->size() >= 1 &&
+              IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+              JumpAroundTarget->pred_size() == 1 &&
+              JumpAroundTarget->succ_size() == 1;
+
+            if (case1 || case2) {
+              InvertAndChangeJumpTarget(MI, UncondTarget);
+              MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
+
+              // Remove the unconditional branch in LayoutSucc.
+              LayoutSucc->erase(LayoutSucc->begin());
+              LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget);
+
+              // This code performs the conversion for case 2, which moves
+              // the block to the fall-thru case (BB3 in the code above).
+              if (case2 && !case1) {
+                JumpAroundTarget->moveAfter(LayoutSucc);
+                // only move a block if it doesn't have a fall-thru. otherwise
+                // the CFG will be incorrect.
+                if (!UncondTarget->canFallThrough()) {
+                  UncondTarget->moveAfter(JumpAroundTarget);
+                }
+              }
+
+              //
+              // Correct live-in information. Is used by post-RA scheduler
+              // The live-in to LayoutSucc is now all values live-in to
+              // JumpAroundTarget.
+              //
+              std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn(
+                  LayoutSucc->livein_begin(), LayoutSucc->livein_end());
+              std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn(
+                  JumpAroundTarget->livein_begin(),
+                  JumpAroundTarget->livein_end());
+              for (const auto &OrigLI : OrigLiveIn)
+                LayoutSucc->removeLiveIn(OrigLI.PhysReg);
+              for (const auto &NewLI : NewLiveIn)
+                LayoutSucc->addLiveIn(NewLI);
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
+                              &HexagonCFGOptimizer::ID, nullptr, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass *llvm::createHexagonCFGOptimizer() {
+  return new HexagonCFGOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 0000000..e61b2a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+  CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+  // All arguments get passed in integer registers if there is space.
+  CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
new file mode 100644
index 0000000..931db66
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -0,0 +1,1310 @@
+//===--- HexagonCommonGEP.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "commgep"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden,
+  cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore);
+
+namespace llvm {
+  void initializeHexagonCommonGEPPass(PassRegistry&);
+}
+
+namespace {
+  struct GepNode;
+  typedef std::set<GepNode*> NodeSet;
+  typedef std::map<GepNode*,Value*> NodeToValueMap;
+  typedef std::vector<GepNode*> NodeVect;
+  typedef std::map<GepNode*,NodeVect> NodeChildrenMap;
+  typedef std::set<Use*> UseSet;
+  typedef std::map<GepNode*,UseSet> NodeToUsesMap;
+
+  // Numbering map for gep nodes. Used to keep track of ordering for
+  // gep nodes.
+  struct NodeOrdering {
+    NodeOrdering() : LastNum(0) {}
+
+    void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
+    void clear() { Map.clear(); }
+
+    bool operator()(const GepNode *N1, const GepNode *N2) const {
+      auto F1 = Map.find(N1), F2 = Map.find(N2);
+      assert(F1 != Map.end() && F2 != Map.end());
+      return F1->second < F2->second;
+    }
+
+  private:
+    std::map<const GepNode *, unsigned> Map;
+    unsigned LastNum;
+  };
+
+  class HexagonCommonGEP : public FunctionPass {
+  public:
+    static char ID;
+    HexagonCommonGEP() : FunctionPass(ID) {
+      initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnFunction(Function &F);
+    virtual const char *getPassName() const {
+      return "Hexagon Common GEP";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<PostDominatorTree>();
+      AU.addPreserved<PostDominatorTree>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef std::map<Value*,GepNode*> ValueToNodeMap;
+    typedef std::vector<Value*> ValueVect;
+    typedef std::map<GepNode*,ValueVect> NodeToValuesMap;
+
+    void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order);
+    bool isHandledGepForm(GetElementPtrInst *GepI);
+    void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM);
+    void collect();
+    void common();
+
+    BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM,
+                                     NodeToValueMap &Loc);
+    BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM,
+                                        NodeToValueMap &Loc);
+    bool isInvariantIn(Value *Val, Loop *L);
+    bool isInvariantIn(GepNode *Node, Loop *L);
+    bool isInMainPath(BasicBlock *B, Loop *L);
+    BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM,
+                                    NodeToValueMap &Loc);
+    void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc);
+    void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM,
+                                NodeToValueMap &Loc);
+    void computeNodePlacement(NodeToValueMap &Loc);
+
+    Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+                        BasicBlock *LocB);
+    void getAllUsersForNode(GepNode *Node, ValueVect &Values,
+                            NodeChildrenMap &NCM);
+    void materialize(NodeToValueMap &Loc);
+
+    void removeDeadCode();
+
+    NodeVect Nodes;
+    NodeToUsesMap Uses;
+    NodeOrdering NodeOrder;   // Node ordering, for deterministic behavior.
+    SpecificBumpPtrAllocator<GepNode> *Mem;
+    LLVMContext *Ctx;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    PostDominatorTree *PDT;
+    Function *Fn;
+  };
+}
+
+
+char HexagonCommonGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+      false, false)
+
+namespace {
+  struct GepNode {
+    enum {
+      None      = 0,
+      Root      = 0x01,
+      Internal  = 0x02,
+      Used      = 0x04
+    };
+
+    uint32_t Flags;
+    union {
+      GepNode *Parent;
+      Value *BaseVal;
+    };
+    Value *Idx;
+    Type *PTy;  // Type of the pointer operand.
+
+    GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {}
+    GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) {
+      if (Flags & Root)
+        BaseVal = N->BaseVal;
+      else
+        Parent = N->Parent;
+    }
+    friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN);
+  };
+
+
+  Type *next_type(Type *Ty, Value *Idx) {
+    // Advance the type.
+    if (!Ty->isStructTy()) {
+      Type *NexTy = cast<SequentialType>(Ty)->getElementType();
+      return NexTy;
+    }
+    // Otherwise it is a struct type.
+    ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+    assert(CI && "Struct type with non-constant index");
+    int64_t i = CI->getValue().getSExtValue();
+    Type *NextTy = cast<StructType>(Ty)->getElementType(i);
+    return NextTy;
+  }
+
+
+  raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
+    OS << "{ {";
+    bool Comma = false;
+    if (GN.Flags & GepNode::Root) {
+      OS << "root";
+      Comma = true;
+    }
+    if (GN.Flags & GepNode::Internal) {
+      if (Comma)
+        OS << ',';
+      OS << "internal";
+      Comma = true;
+    }
+    if (GN.Flags & GepNode::Used) {
+      if (Comma)
+        OS << ',';
+      OS << "used";
+      Comma = true;
+    }
+    OS << "} ";
+    if (GN.Flags & GepNode::Root)
+      OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
+    else
+      OS << "Parent:" << GN.Parent;
+
+    OS << " Idx:";
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx))
+      OS << CI->getValue().getSExtValue();
+    else if (GN.Idx->hasName())
+      OS << GN.Idx->getName();
+    else
+      OS << "<anon> =" << *GN.Idx;
+
+    OS << " PTy:";
+    if (GN.PTy->isStructTy()) {
+      StructType *STy = cast<StructType>(GN.PTy);
+      if (!STy->isLiteral())
+        OS << GN.PTy->getStructName();
+      else
+        OS << "<anon-struct>:" << *STy;
+    }
+    else
+      OS << *GN.PTy;
+    OS << " }";
+    return OS;
+  }
+
+
+  template <typename NodeContainer>
+  void dump_node_container(raw_ostream &OS, const NodeContainer &S) {
+    typedef typename NodeContainer::const_iterator const_iterator;
+    for (const_iterator I = S.begin(), E = S.end(); I != E; ++I)
+      OS << *I << ' ' << **I << '\n';
+  }
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
+    dump_node_container(OS, S);
+    return OS;
+  }
+
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
+    typedef NodeToUsesMap::const_iterator const_iterator;
+    for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+      const UseSet &Us = I->second;
+      OS << I->first << " -> #" << Us.size() << '{';
+      for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+        User *R = (*J)->getUser();
+        if (R->hasName())
+          OS << ' ' << R->getName();
+        else
+          OS << " <?>(" << *R << ')';
+      }
+      OS << " }\n";
+    }
+    return OS;
+  }
+
+
+  struct in_set {
+    in_set(const NodeSet &S) : NS(S) {}
+    bool operator() (GepNode *N) const {
+      return NS.find(N) != NS.end();
+    }
+  private:
+    const NodeSet &NS;
+  };
+}
+
+
+inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) {
+  return A.Allocate();
+}
+
+
+void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
+      ValueVect &Order) {
+  // Compute block ordering for a typical DT-based traversal of the flow
+  // graph: "before visiting a block, all of its dominators must have been
+  // visited".
+
+  Order.push_back(Root);
+  DomTreeNode *DTN = DT->getNode(Root);
+  typedef GraphTraits<DomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType Iter;
+  for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+    getBlockTraversalOrder((*I)->getBlock(), Order);
+}
+
+
+bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
+  // No vector GEPs.
+  if (!GepI->getType()->isPointerTy())
+    return false;
+  // No GEPs without any indices.  (Is this possible?)
+  if (GepI->idx_begin() == GepI->idx_end())
+    return false;
+  return true;
+}
+
+
+void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
+      ValueToNodeMap &NM) {
+  DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
+  GepNode *N = new (*Mem) GepNode;
+  Value *PtrOp = GepI->getPointerOperand();
+  ValueToNodeMap::iterator F = NM.find(PtrOp);
+  if (F == NM.end()) {
+    N->BaseVal = PtrOp;
+    N->Flags |= GepNode::Root;
+  } else {
+    // If PtrOp was a GEP instruction, it must have already been processed.
+    // The ValueToNodeMap entry for it is the last gep node in the generated
+    // chain. Link to it here.
+    N->Parent = F->second;
+  }
+  N->PTy = PtrOp->getType();
+  N->Idx = *GepI->idx_begin();
+
+  // Collect the list of users of this GEP instruction. Will add it to the
+  // last node created for it.
+  UseSet Us;
+  for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end();
+       UI != UE; ++UI) {
+    // Check if this gep is used by anything other than other geps that
+    // we will process.
+    if (isa<GetElementPtrInst>(*UI)) {
+      GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI);
+      if (isHandledGepForm(UserG))
+        continue;
+    }
+    Us.insert(&UI.getUse());
+  }
+  Nodes.push_back(N);
+  NodeOrder.insert(N);
+
+  // Skip the first index operand, since we only handle 0. This dereferences
+  // the pointer operand.
+  GepNode *PN = N;
+  Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType();
+  for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
+       OI != OE; ++OI) {
+    Value *Op = *OI;
+    GepNode *Nx = new (*Mem) GepNode;
+    Nx->Parent = PN;  // Link Nx to the previous node.
+    Nx->Flags |= GepNode::Internal;
+    Nx->PTy = PtrTy;
+    Nx->Idx = Op;
+    Nodes.push_back(Nx);
+    NodeOrder.insert(Nx);
+    PN = Nx;
+
+    PtrTy = next_type(PtrTy, Op);
+  }
+
+  // After last node has been created, update the use information.
+  if (!Us.empty()) {
+    PN->Flags |= GepNode::Used;
+    Uses[PN].insert(Us.begin(), Us.end());
+  }
+
+  // Link the last node with the originating GEP instruction. This is to
+  // help with linking chained GEP instructions.
+  NM.insert(std::make_pair(GepI, PN));
+}
+
+
+void HexagonCommonGEP::collect() {
+  // Establish depth-first traversal order of the dominator tree.
+  ValueVect BO;
+  getBlockTraversalOrder(&Fn->front(), BO);
+
+  // The creation of gep nodes requires DT-traversal. When processing a GEP
+  // instruction that uses another GEP instruction as the base pointer, the
+  // gep node for the base pointer should already exist.
+  ValueToNodeMap NM;
+  for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) {
+    BasicBlock *B = cast<BasicBlock>(*I);
+    for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) {
+      if (!isa<GetElementPtrInst>(J))
+        continue;
+      GetElementPtrInst *GepI = cast<GetElementPtrInst>(J);
+      if (isHandledGepForm(GepI))
+        processGepInst(GepI, NM);
+    }
+  }
+
+  DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
+}
+
+
+namespace {
+  void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
+        NodeVect &Roots) {
+    typedef NodeVect::const_iterator const_iterator;
+    for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+      GepNode *N = *I;
+      if (N->Flags & GepNode::Root) {
+        Roots.push_back(N);
+        continue;
+      }
+      GepNode *PN = N->Parent;
+      NCM[PN].push_back(N);
+    }
+  }
+
+  void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) {
+    NodeVect Work;
+    Work.push_back(Root);
+    Nodes.insert(Root);
+
+    while (!Work.empty()) {
+      NodeVect::iterator First = Work.begin();
+      GepNode *N = *First;
+      Work.erase(First);
+      NodeChildrenMap::iterator CF = NCM.find(N);
+      if (CF != NCM.end()) {
+        Work.insert(Work.end(), CF->second.begin(), CF->second.end());
+        Nodes.insert(CF->second.begin(), CF->second.end());
+      }
+    }
+  }
+}
+
+
+namespace {
+  typedef std::set<NodeSet> NodeSymRel;
+  typedef std::pair<GepNode*,GepNode*> NodePair;
+  typedef std::set<NodePair> NodePairSet;
+
+  const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
+    for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
+      if (I->count(N))
+        return &*I;
+    return 0;
+  }
+
+  // Create an ordered pair of GepNode pointers. The pair will be used in
+  // determining equality. The only purpose of the ordering is to eliminate
+  // duplication due to the commutativity of equality/non-equality.
+  NodePair node_pair(GepNode *N1, GepNode *N2) {
+    uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
+    if (P1 <= P2)
+      return std::make_pair(N1, N2);
+    return std::make_pair(N2, N1);
+  }
+
+  unsigned node_hash(GepNode *N) {
+    // Include everything except flags and parent.
+    FoldingSetNodeID ID;
+    ID.AddPointer(N->Idx);
+    ID.AddPointer(N->PTy);
+    return ID.ComputeHash();
+  }
+
+  bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) {
+    // Don't cache the result for nodes with different hashes. The hash
+    // comparison is fast enough.
+    if (node_hash(N1) != node_hash(N2))
+      return false;
+
+    NodePair NP = node_pair(N1, N2);
+    NodePairSet::iterator FEq = Eq.find(NP);
+    if (FEq != Eq.end())
+      return true;
+    NodePairSet::iterator FNe = Ne.find(NP);
+    if (FNe != Ne.end())
+      return false;
+    // Not previously compared.
+    bool Root1 = N1->Flags & GepNode::Root;
+    bool Root2 = N2->Flags & GepNode::Root;
+    NodePair P = node_pair(N1, N2);
+    // If the Root flag has different values, the nodes are different.
+    // If both nodes are root nodes, but their base pointers differ,
+    // they are different.
+    if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) {
+      Ne.insert(P);
+      return false;
+    }
+    // Here the root flags are identical, and for root nodes the
+    // base pointers are equal, so the root nodes are equal.
+    // For non-root nodes, compare their parent nodes.
+    if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) {
+      Eq.insert(P);
+      return true;
+    }
+    return false;
+  }
+}
+
+
+void HexagonCommonGEP::common() {
+  // The essence of this commoning is finding gep nodes that are equal.
+  // To do this we need to compare all pairs of nodes. To save time,
+  // first, partition the set of all nodes into sets of potentially equal
+  // nodes, and then compare pairs from within each partition.
+  typedef std::map<unsigned,NodeSet> NodeSetMap;
+  NodeSetMap MaybeEq;
+
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    unsigned H = node_hash(N);
+    MaybeEq[H].insert(N);
+  }
+
+  // Compute the equivalence relation for the gep nodes.  Use two caches,
+  // one for equality and the other for non-equality.
+  NodeSymRel EqRel;  // Equality relation (as set of equivalence classes).
+  NodePairSet Eq, Ne;  // Caches.
+  for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end();
+       I != E; ++I) {
+    NodeSet &S = I->second;
+    for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) {
+      GepNode *N = *NI;
+      // If node already has a class, then the class must have been created
+      // in a prior iteration of this loop. Since equality is transitive,
+      // nothing more will be added to that class, so skip it.
+      if (node_class(N, EqRel))
+        continue;
+
+      // Create a new class candidate now.
+      NodeSet C;
+      for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ)
+        if (node_eq(N, *NJ, Eq, Ne))
+          C.insert(*NJ);
+      // If Tmp is empty, N would be the only element in it. Don't bother
+      // creating a class for it then.
+      if (!C.empty()) {
+        C.insert(N);  // Finalize the set before adding it to the relation.
+        std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C);
+        (void)Ins;
+        assert(Ins.second && "Cannot add a class");
+      }
+    }
+  }
+
+  DEBUG({
+    dbgs() << "Gep node equality:\n";
+    for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I)
+      dbgs() << "{ " << I->first << ", " << I->second << " }\n";
+
+    dbgs() << "Gep equivalence classes:\n";
+    for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+      dbgs() << '{';
+      const NodeSet &S = *I;
+      for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
+        if (J != S.begin())
+          dbgs() << ',';
+        dbgs() << ' ' << *J;
+      }
+      dbgs() << " }\n";
+    }
+  });
+
+
+  // Create a projection from a NodeSet to the minimal element in it.
+  typedef std::map<const NodeSet*,GepNode*> ProjMap;
+  ProjMap PM;
+  for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+    const NodeSet &S = *I;
+    GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
+    std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
+    (void)Ins;
+    assert(Ins.second && "Cannot add minimal element");
+
+    // Update the min element's flags, and user list.
+    uint32_t Flags = 0;
+    UseSet &MinUs = Uses[Min];
+    for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) {
+      GepNode *N = *J;
+      uint32_t NF = N->Flags;
+      // If N is used, append all original values of N to the list of
+      // original values of Min.
+      if (NF & GepNode::Used)
+        MinUs.insert(Uses[N].begin(), Uses[N].end());
+      Flags |= NF;
+    }
+    if (MinUs.empty())
+      Uses.erase(Min);
+
+    // The collected flags should include all the flags from the min element.
+    assert((Min->Flags & Flags) == Min->Flags);
+    Min->Flags = Flags;
+  }
+
+  // Commoning: for each non-root gep node, replace "Parent" with the
+  // selected (minimum) node from the corresponding equivalence class.
+  // If a given parent does not have an equivalence class, leave it
+  // unchanged (it means that it's the only element in its class).
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    if (N->Flags & GepNode::Root)
+      continue;
+    const NodeSet *PC = node_class(N->Parent, EqRel);
+    if (!PC)
+      continue;
+    ProjMap::iterator F = PM.find(PC);
+    if (F == PM.end())
+      continue;
+    // Found a replacement, use it.
+    GepNode *Rep = F->second;
+    N->Parent = Rep;
+  }
+
+  DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
+
+  // Finally, erase the nodes that are no longer used.
+  NodeSet Erase;
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    const NodeSet *PC = node_class(N, EqRel);
+    if (!PC)
+      continue;
+    ProjMap::iterator F = PM.find(PC);
+    if (F == PM.end())
+      continue;
+    if (N == F->second)
+      continue;
+    // Node for removal.
+    Erase.insert(*I);
+  }
+  NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(),
+                                           in_set(Erase));
+  Nodes.resize(std::distance(Nodes.begin(), NewE));
+
+  DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
+}
+
+
+namespace {
+  template <typename T>
+  BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
+    DEBUG({
+      dbgs() << "NCD of {";
+      for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
+           I != E; ++I) {
+        if (!*I)
+          continue;
+        BasicBlock *B = cast<BasicBlock>(*I);
+        dbgs() << ' ' << B->getName();
+      }
+      dbgs() << " }\n";
+    });
+
+    // Allow null basic blocks in Blocks.  In such cases, return 0.
+    typename T::iterator I = Blocks.begin(), E = Blocks.end();
+    if (I == E || !*I)
+      return 0;
+    BasicBlock *Dom = cast<BasicBlock>(*I);
+    while (++I != E) {
+      BasicBlock *B = cast_or_null<BasicBlock>(*I);
+      Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0;
+      if (!Dom)
+        return 0;
+    }
+    DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
+    return Dom;
+  }
+
+  template <typename T>
+  BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
+    // If two blocks, A and B, dominate a block C, then A dominates B,
+    // or B dominates A.
+    typename T::iterator I = Blocks.begin(), E = Blocks.end();
+    // Find the first non-null block.
+    while (I != E && !*I)
+      ++I;
+    if (I == E)
+      return DT->getRoot();
+    BasicBlock *DomB = cast<BasicBlock>(*I);
+    while (++I != E) {
+      if (!*I)
+        continue;
+      BasicBlock *B = cast<BasicBlock>(*I);
+      if (DT->dominates(B, DomB))
+        continue;
+      if (!DT->dominates(DomB, B))
+        return 0;
+      DomB = B;
+    }
+    return DomB;
+  }
+
+  // Find the first use in B of any value from Values. If no such use,
+  // return B->end().
+  template <typename T>
+  BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
+    BasicBlock::iterator FirstUse = B->end(), BEnd = B->end();
+    typedef typename T::iterator iterator;
+    for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) {
+      Value *V = *I;
+      // If V is used in a PHI node, the use belongs to the incoming block,
+      // not the block with the PHI node. In the incoming block, the use
+      // would be considered as being at the end of it, so it cannot
+      // influence the position of the first use (which is assumed to be
+      // at the end to start with).
+      if (isa<PHINode>(V))
+        continue;
+      if (!isa<Instruction>(V))
+        continue;
+      Instruction *In = cast<Instruction>(V);
+      if (In->getParent() != B)
+        continue;
+      BasicBlock::iterator It = In->getIterator();
+      if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
+        FirstUse = It;
+    }
+    return FirstUse;
+  }
+
+  bool is_empty(const BasicBlock *B) {
+    return B->empty() || (&*B->begin() == B->getTerminator());
+  }
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "Loc for node:" << Node << '\n');
+  // Recalculate the placement for Node, assuming that the locations of
+  // its children in Loc are valid.
+  // Return 0 if there is no valid placement for Node (for example, it
+  // uses an index value that is not available at the location required
+  // to dominate all children, etc.).
+
+  // Find the nearest common dominator for:
+  // - all users, if the node is used, and
+  // - all children.
+  ValueVect Bs;
+  if (Node->Flags & GepNode::Used) {
+    // Append all blocks with uses of the original values to the
+    // block vector Bs.
+    NodeToUsesMap::iterator UF = Uses.find(Node);
+    assert(UF != Uses.end() && "Used node with no use information");
+    UseSet &Us = UF->second;
+    for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+      Use *U = *I;
+      User *R = U->getUser();
+      if (!isa<Instruction>(R))
+        continue;
+      BasicBlock *PB = isa<PHINode>(R)
+          ? cast<PHINode>(R)->getIncomingBlock(*U)
+          : cast<Instruction>(R)->getParent();
+      Bs.push_back(PB);
+    }
+  }
+  // Append the location of each child.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+      GepNode *CN = *I;
+      NodeToValueMap::iterator LF = Loc.find(CN);
+      // If the child is only used in GEP instructions (i.e. is not used in
+      // non-GEP instructions), the nearest dominator computed for it may
+      // have been null. In such case it won't have a location available.
+      if (LF == Loc.end())
+        continue;
+      Bs.push_back(LF->second);
+    }
+  }
+
+  BasicBlock *DomB = nearest_common_dominator(DT, Bs);
+  if (!DomB)
+    return 0;
+  // Check if the index used by Node dominates the computed dominator.
+  Instruction *IdxI = dyn_cast<Instruction>(Node->Idx);
+  if (IdxI && !DT->dominates(IdxI->getParent(), DomB))
+    return 0;
+
+  // Avoid putting nodes into empty blocks.
+  while (is_empty(DomB)) {
+    DomTreeNode *N = (*DT)[DomB]->getIDom();
+    if (!N)
+      break;
+    DomB = N->getBlock();
+  }
+
+  // Otherwise, DomB is fine. Update the location map.
+  Loc[Node] = DomB;
+  return DomB;
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
+  // Recalculate the placement of Node, after recursively recalculating the
+  // placements of all its children.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+      recalculatePlacementRec(*I, NCM, Loc);
+  }
+  BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
+  DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
+  return LB;
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
+  if (isa<Constant>(Val) || isa<Argument>(Val))
+    return true;
+  Instruction *In = dyn_cast<Instruction>(Val);
+  if (!In)
+    return false;
+  BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent();
+  return DT->properlyDominates(DefB, HdrB);
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
+  if (Node->Flags & GepNode::Root)
+    if (!isInvariantIn(Node->BaseVal, L))
+      return false;
+  return isInvariantIn(Node->Idx, L);
+}
+
+
+bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
+  BasicBlock *HB = L->getHeader();
+  BasicBlock *LB = L->getLoopLatch();
+  // B must post-dominate the loop header or dominate the loop latch.
+  if (PDT->dominates(B, HB))
+    return true;
+  if (LB && DT->dominates(B, LB))
+    return true;
+  return false;
+}
+
+
+namespace {
+  BasicBlock *preheader(DominatorTree *DT, Loop *L) {
+    if (BasicBlock *PH = L->getLoopPreheader())
+      return PH;
+    if (!OptSpeculate)
+      return 0;
+    DomTreeNode *DN = DT->getNode(L->getHeader());
+    if (!DN)
+      return 0;
+    return DN->getIDom()->getBlock();
+  }
+}
+
+
+BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  // Find the "topmost" location for Node: it must be dominated by both,
+  // its parent (or the BaseVal, if it's a root node), and by the index
+  // value.
+  ValueVect Bs;
+  if (Node->Flags & GepNode::Root) {
+    if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal))
+      Bs.push_back(PIn->getParent());
+  } else {
+    Bs.push_back(Loc[Node->Parent]);
+  }
+  if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx))
+    Bs.push_back(IIn->getParent());
+  BasicBlock *TopB = nearest_common_dominatee(DT, Bs);
+
+  // Traverse the loop nest upwards until we find a loop in which Node
+  // is no longer invariant, or until we get to the upper limit of Node's
+  // placement. The traversal will also stop when a suitable "preheader"
+  // cannot be found for a given loop. The "preheader" may actually be
+  // a regular block outside of the loop (i.e. not guarded), in which case
+  // the Node will be speculated.
+  // For nodes that are not in the main path of the containing loop (i.e.
+  // are not executed in each iteration), do not move them out of the loop.
+  BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]);
+  if (LocB) {
+    Loop *Lp = LI->getLoopFor(LocB);
+    while (Lp) {
+      if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp))
+        break;
+      BasicBlock *NewLoc = preheader(DT, Lp);
+      if (!NewLoc || !DT->dominates(TopB, NewLoc))
+        break;
+      Lp = Lp->getParentLoop();
+      LocB = NewLoc;
+    }
+  }
+  Loc[Node] = LocB;
+
+  // Recursively compute the locations of all children nodes.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+      adjustForInvariance(*I, NCM, Loc);
+  }
+  return LocB;
+}
+
+
+namespace {
+  struct LocationAsBlock {
+    LocationAsBlock(const NodeToValueMap &L) : Map(L) {}
+    const NodeToValueMap &Map;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
+  raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+    for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end();
+         I != E; ++I) {
+      OS << I->first << " -> ";
+      BasicBlock *B = cast<BasicBlock>(I->second);
+      OS << B->getName() << '(' << B << ')';
+      OS << '\n';
+    }
+    return OS;
+  }
+
+  inline bool is_constant(GepNode *N) {
+    return isa<ConstantInt>(N->Idx);
+  }
+}
+
+
+void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
+      NodeToValueMap &Loc) {
+  User *R = U->getUser();
+  DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: "
+               << *R << '\n');
+  BasicBlock *PB = cast<Instruction>(R)->getParent();
+
+  GepNode *N = Node;
+  GepNode *C = 0, *NewNode = 0;
+  while (is_constant(N) && !(N->Flags & GepNode::Root)) {
+    // XXX if (single-use) dont-replicate;
+    GepNode *NewN = new (*Mem) GepNode(N);
+    Nodes.push_back(NewN);
+    Loc[NewN] = PB;
+
+    if (N == Node)
+      NewNode = NewN;
+    NewN->Flags &= ~GepNode::Used;
+    if (C)
+      C->Parent = NewN;
+    C = NewN;
+    N = N->Parent;
+  }
+  if (!NewNode)
+    return;
+
+  // Move over all uses that share the same user as U from Node to NewNode.
+  NodeToUsesMap::iterator UF = Uses.find(Node);
+  assert(UF != Uses.end());
+  UseSet &Us = UF->second;
+  UseSet NewUs;
+  for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
+    User *S = (*I)->getUser();
+    UseSet::iterator Nx = std::next(I);
+    if (S == R) {
+      NewUs.insert(*I);
+      Us.erase(I);
+    }
+    I = Nx;
+  }
+  if (Us.empty()) {
+    Node->Flags &= ~GepNode::Used;
+    Uses.erase(UF);
+  }
+
+  // Should at least have U in NewUs.
+  NewNode->Flags |= GepNode::Used;
+  DEBUG(dbgs() << "new node: " << NewNode << "  " << *NewNode << '\n');
+  assert(!NewUs.empty());
+  Uses[NewNode] = NewUs;
+}
+
+
+void HexagonCommonGEP::separateConstantChains(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  // First approximation: extract all chains.
+  NodeSet Ns;
+  nodes_for_root(Node, NCM, Ns);
+
+  DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
+  // Collect all used nodes together with the uses from loads and stores,
+  // where the GEP node could be folded into the load/store instruction.
+  NodeToUsesMap FNs; // Foldable nodes.
+  for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) {
+    GepNode *N = *I;
+    if (!(N->Flags & GepNode::Used))
+      continue;
+    NodeToUsesMap::iterator UF = Uses.find(N);
+    assert(UF != Uses.end());
+    UseSet &Us = UF->second;
+    // Loads/stores that use the node N.
+    UseSet LSs;
+    for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+      Use *U = *J;
+      User *R = U->getUser();
+      // We're interested in uses that provide the address. It can happen
+      // that the value may also be provided via GEP, but we won't handle
+      // those cases here for now.
+      if (LoadInst *Ld = dyn_cast<LoadInst>(R)) {
+        unsigned PtrX = LoadInst::getPointerOperandIndex();
+        if (&Ld->getOperandUse(PtrX) == U)
+          LSs.insert(U);
+      } else if (StoreInst *St = dyn_cast<StoreInst>(R)) {
+        unsigned PtrX = StoreInst::getPointerOperandIndex();
+        if (&St->getOperandUse(PtrX) == U)
+          LSs.insert(U);
+      }
+    }
+    // Even if the total use count is 1, separating the chain may still be
+    // beneficial, since the constant chain may be longer than the GEP alone
+    // would be (e.g. if the parent node has a constant index and also has
+    // other children).
+    if (!LSs.empty())
+      FNs.insert(std::make_pair(N, LSs));
+  }
+
+  DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
+
+  for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
+    GepNode *N = I->first;
+    UseSet &Us = I->second;
+    for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J)
+      separateChainForNode(N, *J, Loc);
+  }
+}
+
+
+void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
+  // Compute the inverse of the Node.Parent links. Also, collect the set
+  // of root nodes.
+  NodeChildrenMap NCM;
+  NodeVect Roots;
+  invert_find_roots(Nodes, NCM, Roots);
+
+  // Compute the initial placement determined by the users' locations, and
+  // the locations of the child nodes.
+  for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+    recalculatePlacementRec(*I, NCM, Loc);
+
+  DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
+
+  if (OptEnableInv) {
+    for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+      adjustForInvariance(*I, NCM, Loc);
+
+    DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
+                 << LocationAsBlock(Loc));
+  }
+  if (OptEnableConst) {
+    for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+      separateConstantChains(*I, NCM, Loc);
+  }
+  DEBUG(dbgs() << "Node use information:\n" << Uses);
+
+  // At the moment, there is no further refinement of the initial placement.
+  // Such a refinement could include splitting the nodes if they are placed
+  // too far from some of its users.
+
+  DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
+}
+
+
+Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+      BasicBlock *LocB) {
+  DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
+               << " for nodes:\n" << NA);
+  unsigned Num = NA.size();
+  GepNode *RN = NA[0];
+  assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
+
+  Value *NewInst = 0;
+  Value *Input = RN->BaseVal;
+  Value **IdxList = new Value*[Num+1];
+  unsigned nax = 0;
+  do {
+    unsigned IdxC = 0;
+    // If the type of the input of the first node is not a pointer,
+    // we need to add an artificial i32 0 to the indices (because the
+    // actual input in the IR will be a pointer).
+    if (!NA[nax]->PTy->isPointerTy()) {
+      Type *Int32Ty = Type::getInt32Ty(*Ctx);
+      IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0);
+    }
+
+    // Keep adding indices from NA until we have to stop and generate
+    // an "intermediate" GEP.
+    while (++nax <= Num) {
+      GepNode *N = NA[nax-1];
+      IdxList[IdxC++] = N->Idx;
+      if (nax < Num) {
+        // We have to stop, if the expected type of the output of this node
+        // is not the same as the input type of the next node.
+        Type *NextTy = next_type(N->PTy, N->Idx);
+        if (NextTy != NA[nax]->PTy)
+          break;
+      }
+    }
+    ArrayRef<Value*> A(IdxList, IdxC);
+    Type *InpTy = Input->getType();
+    Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
+    NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
+    DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
+    Input = NewInst;
+  } while (nax <= Num);
+
+  delete[] IdxList;
+  return NewInst;
+}
+
+
+void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
+      NodeChildrenMap &NCM) {
+  NodeVect Work;
+  Work.push_back(Node);
+
+  while (!Work.empty()) {
+    NodeVect::iterator First = Work.begin();
+    GepNode *N = *First;
+    Work.erase(First);
+    if (N->Flags & GepNode::Used) {
+      NodeToUsesMap::iterator UF = Uses.find(N);
+      assert(UF != Uses.end() && "No use information for used node");
+      UseSet &Us = UF->second;
+      for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I)
+        Values.push_back((*I)->getUser());
+    }
+    NodeChildrenMap::iterator CF = NCM.find(N);
+    if (CF != NCM.end()) {
+      NodeVect &Cs = CF->second;
+      Work.insert(Work.end(), Cs.begin(), Cs.end());
+    }
+  }
+}
+
+
+void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
+  NodeChildrenMap NCM;
+  NodeVect Roots;
+  // Compute the inversion again, since computing placement could alter
+  // "parent" relation between nodes.
+  invert_find_roots(Nodes, NCM, Roots);
+
+  while (!Roots.empty()) {
+    NodeVect::iterator First = Roots.begin();
+    GepNode *Root = *First, *Last = *First;
+    Roots.erase(First);
+
+    NodeVect NA;  // Nodes to assemble.
+    // Append to NA all child nodes up to (and including) the first child
+    // that:
+    // (1) has more than 1 child, or
+    // (2) is used, or
+    // (3) has a child located in a different block.
+    bool LastUsed = false;
+    unsigned LastCN = 0;
+    // The location may be null if the computation failed (it can legitimately
+    // happen for nodes created from dead GEPs).
+    Value *LocV = Loc[Last];
+    if (!LocV)
+      continue;
+    BasicBlock *LastB = cast<BasicBlock>(LocV);
+    do {
+      NA.push_back(Last);
+      LastUsed = (Last->Flags & GepNode::Used);
+      if (LastUsed)
+        break;
+      NodeChildrenMap::iterator CF = NCM.find(Last);
+      LastCN = (CF != NCM.end()) ? CF->second.size() : 0;
+      if (LastCN != 1)
+        break;
+      GepNode *Child = CF->second.front();
+      BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]);
+      if (ChildB != 0 && LastB != ChildB)
+        break;
+      Last = Child;
+    } while (true);
+
+    BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator();
+    if (LastUsed || LastCN > 0) {
+      ValueVect Urs;
+      getAllUsersForNode(Root, Urs, NCM);
+      BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB);
+      if (FirstUse != LastB->end())
+        InsertAt = FirstUse;
+    }
+
+    // Generate a new instruction for NA.
+    Value *NewInst = fabricateGEP(NA, InsertAt, LastB);
+
+    // Convert all the children of Last node into roots, and append them
+    // to the Roots list.
+    if (LastCN > 0) {
+      NodeVect &Cs = NCM[Last];
+      for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+        GepNode *CN = *I;
+        CN->Flags &= ~GepNode::Internal;
+        CN->Flags |= GepNode::Root;
+        CN->BaseVal = NewInst;
+        Roots.push_back(CN);
+      }
+    }
+
+    // Lastly, if the Last node was used, replace all uses with the new GEP.
+    // The uses reference the original GEP values.
+    if (LastUsed) {
+      NodeToUsesMap::iterator UF = Uses.find(Last);
+      assert(UF != Uses.end() && "No use information found");
+      UseSet &Us = UF->second;
+      for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+        Use *U = *I;
+        U->set(NewInst);
+      }
+    }
+  }
+}
+
+
+void HexagonCommonGEP::removeDeadCode() {
+  ValueVect BO;
+  BO.push_back(&Fn->front());
+
+  for (unsigned i = 0; i < BO.size(); ++i) {
+    BasicBlock *B = cast<BasicBlock>(BO[i]);
+    DomTreeNode *N = DT->getNode(B);
+    typedef GraphTraits<DomTreeNode*> GTN;
+    typedef GTN::ChildIteratorType Iter;
+    for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+      BO.push_back((*I)->getBlock());
+  }
+
+  for (unsigned i = BO.size(); i > 0; --i) {
+    BasicBlock *B = cast<BasicBlock>(BO[i-1]);
+    BasicBlock::InstListType &IL = B->getInstList();
+    typedef BasicBlock::InstListType::reverse_iterator reverse_iterator;
+    ValueVect Ins;
+    for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I)
+      Ins.push_back(&*I);
+    for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
+      Instruction *In = cast<Instruction>(*I);
+      if (isInstructionTriviallyDead(In))
+        In->eraseFromParent();
+    }
+  }
+}
+
+
+bool HexagonCommonGEP::runOnFunction(Function &F) {
+  // For now bail out on C++ exception handling.
+  for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
+    for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+      if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
+        return false;
+
+  Fn = &F;
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  PDT = &getAnalysis<PostDominatorTree>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  Ctx = &F.getContext();
+
+  Nodes.clear();
+  Uses.clear();
+  NodeOrder.clear();
+
+  SpecificBumpPtrAllocator<GepNode> Allocator;
+  Mem = &Allocator;
+
+  collect();
+  common();
+
+  NodeToValueMap Loc;
+  computeNodePlacement(Loc);
+  materialize(Loc);
+  removeDeadCode();
+
+#ifdef XDEBUG
+  // Run this only when expensive checks are enabled.
+  verifyFunction(F);
+#endif
+  return true;
+}
+
+
+namespace llvm {
+  FunctionPass *createHexagonCommonGEP() {
+    return new HexagonCommonGEP();
+  }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
new file mode 100644
index 0000000..9fd863f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -0,0 +1,754 @@
+//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass replaces transfer instructions by combine instructions.
+// We walk along a basic block and look for two combinable instructions and try
+// to move them together. If we can move them next to each other we do so and
+// replace them with a combine instruction.
+//===----------------------------------------------------------------------===//
+#include "llvm/PassSupport.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-copy-combine"
+
+static
+cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines",
+                                 cl::Hidden, cl::ZeroOrMore,
+                                 cl::init(false),
+                                 cl::desc("Disable merging into combines"));
+static
+cl::opt<unsigned>
+MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store",
+                   cl::Hidden, cl::init(4),
+                   cl::desc("Maximum distance between a tfr feeding a store we "
+                            "consider the store still to be newifiable"));
+
+namespace llvm {
+  FunctionPass *createHexagonCopyToCombine();
+  void initializeHexagonCopyToCombinePass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonCopyToCombine : public MachineFunctionPass  {
+  const HexagonInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  bool ShouldCombineAggressively;
+
+  DenseSet<MachineInstr *> PotentiallyNewifiableTFR;
+public:
+  static char ID;
+
+  HexagonCopyToCombine() : MachineFunctionPass(ID) {
+    initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const override {
+    return "Hexagon Copy-To-Combine Pass";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1);
+
+  void findPotentialNewifiableTFRs(MachineBasicBlock &);
+
+  void combine(MachineInstr *I1, MachineInstr *I2,
+               MachineBasicBlock::iterator &MI, bool DoInsertAtI1);
+
+  bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2,
+                            unsigned I1DestReg, unsigned I2DestReg,
+                            bool &DoInsertAtI1);
+
+  void emitCombineRR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineRI(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineIR(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+
+  void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg,
+                     MachineOperand &HiOperand, MachineOperand &LoOperand);
+};
+
+} // End anonymous namespace.
+
+char HexagonCopyToCombine::ID = 0;
+
+INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine",
+                "Hexagon Copy-To-Combine Pass", false, false)
+
+static bool isCombinableInstType(MachineInstr *MI,
+                                 const HexagonInstrInfo *TII,
+                                 bool ShouldCombineAggressively) {
+  switch(MI->getOpcode()) {
+  case Hexagon::A2_tfr: {
+    // A COPY instruction can be combined if its arguments are IntRegs (32bit).
+    const MachineOperand &Op0 = MI->getOperand(0);
+    const MachineOperand &Op1 = MI->getOperand(1);
+    assert(Op0.isReg() && Op1.isReg());
+
+    unsigned DestReg = Op0.getReg();
+    unsigned SrcReg = Op1.getReg();
+    return Hexagon::IntRegsRegClass.contains(DestReg) &&
+           Hexagon::IntRegsRegClass.contains(SrcReg);
+  }
+
+  case Hexagon::A2_tfrsi: {
+    // A transfer-immediate can be combined if its argument is a signed 8bit
+    // value.
+    const MachineOperand &Op0 = MI->getOperand(0);
+    const MachineOperand &Op1 = MI->getOperand(1);
+    assert(Op0.isReg());
+
+    unsigned DestReg = Op0.getReg();
+    // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
+    // workaround for an ABI bug that prevents GOT relocations on combine
+    // instructions
+    if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
+      return false;
+
+    // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+    bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
+    return Hexagon::IntRegsRegClass.contains(DestReg) &&
+           (ShouldCombineAggressively || NotExt);
+  }
+
+  default:
+    break;
+  }
+
+  return false;
+}
+
+template <unsigned N>
+static bool isGreaterThanNBitTFRI(const MachineInstr *I) {
+  if (I->getOpcode() == Hexagon::TFRI64_V4 ||
+      I->getOpcode() == Hexagon::A2_tfrsi) {
+    const MachineOperand &Op = I->getOperand(1);
+    return !Op.isImm() || !isInt<N>(Op.getImm());
+  }
+  return false;
+}
+
+/// areCombinableOperations - Returns true if the two instruction can be merge
+/// into a combine (ignoring register constraints).
+static bool areCombinableOperations(const TargetRegisterInfo *TRI,
+                                    MachineInstr *HighRegInst,
+                                    MachineInstr *LowRegInst) {
+  unsigned HiOpc = HighRegInst->getOpcode();
+  unsigned LoOpc = LowRegInst->getOpcode();
+  (void)HiOpc; // Fix compiler warning
+  (void)LoOpc; // Fix compiler warning
+  assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) &&
+         (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) &&
+         "Assume individual instructions are of a combinable type");
+
+  // There is no combine of two constant extended values.
+  if (isGreaterThanNBitTFRI<8>(HighRegInst) &&
+      isGreaterThanNBitTFRI<6>(LowRegInst))
+    return false;
+
+  return true;
+}
+
+static bool isEvenReg(unsigned Reg) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         Hexagon::IntRegsRegClass.contains(Reg));
+  return (Reg - Hexagon::R0) % 2 == 0;
+}
+
+static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) {
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    MachineOperand &Op = MI->getOperand(I);
+    if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill())
+      continue;
+    Op.setIsKill(false);
+  }
+}
+
+/// isUnsafeToMoveAcross - Returns true if it is unsafe to move a copy
+/// instruction from \p UseReg to \p DestReg over the instruction \p I.
+static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg,
+                                  unsigned DestReg,
+                                  const TargetRegisterInfo *TRI) {
+  return (UseReg && (I->modifiesRegister(UseReg, TRI))) ||
+         I->modifiesRegister(DestReg, TRI) ||
+         I->readsRegister(DestReg, TRI) ||
+         I->hasUnmodeledSideEffects() ||
+         I->isInlineAsm() || I->isDebugValue();
+}
+
+static unsigned UseReg(const MachineOperand& MO) {
+  return MO.isReg() ? MO.getReg() : 0;
+}
+
+/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
+/// that the two instructions can be paired in a combine.
+bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
+                                                MachineInstr *I2,
+                                                unsigned I1DestReg,
+                                                unsigned I2DestReg,
+                                                bool &DoInsertAtI1) {
+  unsigned I2UseReg = UseReg(I2->getOperand(1));
+
+  // It is not safe to move I1 and I2 into one combine if I2 has a true
+  // dependence on I1.
+  if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI))
+    return false;
+
+  bool isSafe = true;
+
+  // First try to move I2 towards I1.
+  {
+    // A reverse_iterator instantiated like below starts before I2, and I1
+    // respectively.
+    // Look at instructions I in between I2 and (excluding) I1.
+    MachineBasicBlock::reverse_iterator I(I2),
+      End = --(MachineBasicBlock::reverse_iterator(I1));
+    // At 03 we got better results (dhrystone!) by being more conservative.
+    if (!ShouldCombineAggressively)
+      End = MachineBasicBlock::reverse_iterator(I1);
+    // If I2 kills its operand and we move I2 over an instruction that also
+    // uses I2's use reg we need to modify that (first) instruction to now kill
+    // this reg.
+    unsigned KilledOperand = 0;
+    if (I2->killsRegister(I2UseReg))
+      KilledOperand = I2UseReg;
+    MachineInstr *KillingInstr = nullptr;
+
+    for (; I != End; ++I) {
+      // If the intervening instruction I:
+      //   * modifies I2's use reg
+      //   * modifies I2's def reg
+      //   * reads I2's def reg
+      //   * or has unmodelled side effects
+      // we can't move I2 across it.
+      if (isUnsafeToMoveAcross(&*I, I2UseReg, I2DestReg, TRI)) {
+        isSafe = false;
+        break;
+      }
+
+      // Update first use of the killed operand.
+      if (!KillingInstr && KilledOperand &&
+          I->readsRegister(KilledOperand, TRI))
+        KillingInstr = &*I;
+    }
+    if (isSafe) {
+      // Update the intermediate instruction to with the kill flag.
+      if (KillingInstr) {
+        bool Added = KillingInstr->addRegisterKilled(KilledOperand, TRI, true);
+        (void)Added; // suppress compiler warning
+        assert(Added && "Must successfully update kill flag");
+        removeKillInfo(I2, KilledOperand);
+      }
+      DoInsertAtI1 = true;
+      return true;
+    }
+  }
+
+  // Try to move I1 towards I2.
+  {
+    // Look at instructions I in between I1 and (excluding) I2.
+    MachineBasicBlock::iterator I(I1), End(I2);
+    // At O3 we got better results (dhrystone) by being more conservative here.
+    if (!ShouldCombineAggressively)
+      End = std::next(MachineBasicBlock::iterator(I2));
+    unsigned I1UseReg = UseReg(I1->getOperand(1));
+    // Track killed operands. If we move across an instruction that kills our
+    // operand, we need to update the kill information on the moved I1. It kills
+    // the operand now.
+    MachineInstr *KillingInstr = nullptr;
+    unsigned KilledOperand = 0;
+
+    while(++I != End) {
+      // If the intervening instruction I:
+      //   * modifies I1's use reg
+      //   * modifies I1's def reg
+      //   * reads I1's def reg
+      //   * or has unmodelled side effects
+      //   We introduce this special case because llvm has no api to remove a
+      //   kill flag for a register (a removeRegisterKilled() analogous to
+      //   addRegisterKilled) that handles aliased register correctly.
+      //   * or has a killed aliased register use of I1's use reg
+      //           %D4<def> = TFRI64 16
+      //           %R6<def> = TFR %R9
+      //           %R8<def> = KILL %R8, %D4<imp-use,kill>
+      //      If we want to move R6 = across the KILL instruction we would have
+      //      to remove the %D4<imp-use,kill> operand. For now, we are
+      //      conservative and disallow the move.
+      // we can't move I1 across it.
+      if (isUnsafeToMoveAcross(I, I1UseReg, I1DestReg, TRI) ||
+          // Check for an aliased register kill. Bail out if we see one.
+          (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI)))
+        return false;
+
+      // Check for an exact kill (registers match).
+      if (I1UseReg && I->killsRegister(I1UseReg)) {
+        assert(!KillingInstr && "Should only see one killing instruction");
+        KilledOperand = I1UseReg;
+        KillingInstr = &*I;
+      }
+    }
+    if (KillingInstr) {
+      removeKillInfo(KillingInstr, KilledOperand);
+      // Update I1 to set the kill flag. This flag will later be picked up by
+      // the new COMBINE instruction.
+      bool Added = I1->addRegisterKilled(KilledOperand, TRI);
+      (void)Added; // suppress compiler warning
+      assert(Added && "Must successfully update kill flag");
+    }
+    DoInsertAtI1 = false;
+  }
+
+  return true;
+}
+
+/// findPotentialNewifiableTFRs - Finds tranfers that feed stores that could be
+/// newified. (A use of a 64 bit register define can not be newified)
+void
+HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
+  DenseMap<unsigned, MachineInstr *> LastDef;
+  for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+    MachineInstr *MI = I;
+    // Mark TFRs that feed a potential new value store as such.
+    if(TII->mayBeNewStore(MI)) {
+      // Look for uses of TFR instructions.
+      for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE;
+           ++OpdIdx) {
+        MachineOperand &Op = MI->getOperand(OpdIdx);
+
+        // Skip over anything except register uses.
+        if (!Op.isReg() || !Op.isUse() || !Op.getReg())
+          continue;
+
+        // Look for the defining instruction.
+        unsigned Reg = Op.getReg();
+        MachineInstr *DefInst = LastDef[Reg];
+        if (!DefInst)
+          continue;
+        if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively))
+          continue;
+
+        // Only close newifiable stores should influence the decision.
+        MachineBasicBlock::iterator It(DefInst);
+        unsigned NumInstsToDef = 0;
+        while (&*It++ != MI)
+          ++NumInstsToDef;
+
+        if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR)
+          continue;
+
+        PotentiallyNewifiableTFR.insert(DefInst);
+      }
+      // Skip to next instruction.
+      continue;
+    }
+
+    // Put instructions that last defined integer or double registers into the
+    // map.
+    for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+      MachineOperand &Op = MI->getOperand(I);
+      if (!Op.isReg() || !Op.isDef() || !Op.getReg())
+        continue;
+      unsigned Reg = Op.getReg();
+      if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
+        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+          LastDef[*SubRegs] = MI;
+        }
+      } else if (Hexagon::IntRegsRegClass.contains(Reg))
+        LastDef[Reg] = MI;
+    }
+  }
+}
+
+bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
+
+  if (IsCombinesDisabled) return false;
+
+  bool HasChanged = false;
+
+  // Get target info.
+  TRI = MF.getSubtarget().getRegisterInfo();
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+  // Combine aggressively (for code size)
+  ShouldCombineAggressively =
+    MF.getTarget().getOptLevel() <= CodeGenOpt::Default;
+
+  // Traverse basic blocks.
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+       ++BI) {
+    PotentiallyNewifiableTFR.clear();
+    findPotentialNewifiableTFRs(*BI);
+
+    // Traverse instructions in basic block.
+    for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end();
+        MI != End;) {
+      MachineInstr *I1 = MI++;
+      // Don't combine a TFR whose user could be newified (instructions that
+      // define double registers can not be newified - Programmer's Ref Manual
+      // 5.4.2 New-value stores).
+      if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1))
+        continue;
+
+      // Ignore instructions that are not combinable.
+      if (!isCombinableInstType(I1, TII, ShouldCombineAggressively))
+        continue;
+
+      // Find a second instruction that can be merged into a combine
+      // instruction.
+      bool DoInsertAtI1 = false;
+      MachineInstr *I2 = findPairable(I1, DoInsertAtI1);
+      if (I2) {
+        HasChanged = true;
+        combine(I1, I2, MI, DoInsertAtI1);
+      }
+    }
+  }
+
+  return HasChanged;
+}
+
+/// findPairable - Returns an instruction that can be merged with \p I1 into a
+/// COMBINE instruction or 0 if no such instruction can be found. Returns true
+/// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1
+/// false if the combine must be inserted at the returned instruction.
+MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1,
+                                                 bool &DoInsertAtI1) {
+  MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1));
+  unsigned I1DestReg = I1->getOperand(0).getReg();
+
+  for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End;
+       ++I2) {
+    // Bail out early if we see a second definition of I1DestReg.
+    if (I2->modifiesRegister(I1DestReg, TRI))
+      break;
+
+    // Ignore non-combinable instructions.
+    if (!isCombinableInstType(I2, TII, ShouldCombineAggressively))
+      continue;
+
+    // Don't combine a TFR whose user could be newified.
+    if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2))
+      continue;
+
+    unsigned I2DestReg = I2->getOperand(0).getReg();
+
+    // Check that registers are adjacent and that the first destination register
+    // is even.
+    bool IsI1LowReg = (I2DestReg - I1DestReg) == 1;
+    bool IsI2LowReg = (I1DestReg - I2DestReg) == 1;
+    unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg;
+    if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
+      continue;
+
+    // Check that the two instructions are combinable. V4 allows more
+    // instructions to be merged into a combine.
+    // The order matters because in a TFRI we might can encode a int8 as the
+    // hi reg operand but only a uint6 as the low reg operand.
+    if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) ||
+        (IsI1LowReg && !areCombinableOperations(TRI, I2, I1)))
+      break;
+
+    if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg,
+                             DoInsertAtI1))
+      return I2;
+
+    // Not safe. Stop searching.
+    break;
+  }
+  return nullptr;
+}
+
+void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2,
+                                   MachineBasicBlock::iterator &MI,
+                                   bool DoInsertAtI1) {
+  // We are going to delete I2. If MI points to I2 advance it to the next
+  // instruction.
+  if ((MachineInstr *)MI == I2) ++MI;
+
+  // Figure out whether I1 or I2 goes into the lowreg part.
+  unsigned I1DestReg = I1->getOperand(0).getReg();
+  unsigned I2DestReg = I2->getOperand(0).getReg();
+  bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
+  unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;
+
+  // Get the double word register.
+  unsigned DoubleRegDest =
+    TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg,
+                             &Hexagon::DoubleRegsRegClass);
+  assert(DoubleRegDest != 0 && "Expect a valid register");
+
+
+  // Setup source operands.
+  MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) :
+    I2->getOperand(1);
+  MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) :
+    I1->getOperand(1);
+
+  // Figure out which source is a register and which a constant.
+  bool IsHiReg = HiOperand.isReg();
+  bool IsLoReg = LoOperand.isReg();
+
+  MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2);
+  // Emit combine.
+  if (IsHiReg && IsLoReg)
+    emitCombineRR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else if (IsHiReg)
+    emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else if (IsLoReg)
+    emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+  else
+    emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand);
+
+  I1->eraseFromParent();
+  I2->eraseFromParent();
+}
+
+void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle globals.
+  if (HiOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+                        HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+                        LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle block addresses.
+  if (HiOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+                       HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+                       LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle jump tables.
+  if (HiOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Handle constant pools.
+  if (HiOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+                            HiOperand.getTargetFlags())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+  if (LoOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+                            LoOperand.getTargetFlags());
+    return;
+  }
+
+  // First preference should be given to Hexagon::A2_combineii instruction
+  // as it can include U6 (in Hexagon::A4_combineii) as well.
+  // In this instruction, HiOperand is const extended, if required.
+  if (isInt<8>(LoOperand.getImm())) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addImm(LoOperand.getImm());
+      return;
+  }
+
+  // In this instruction, LoOperand is const extended, if required.
+  if (isInt<8>(HiOperand.getImm())) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg)
+      .addImm(HiOperand.getImm())
+      .addImm(LoOperand.getImm());
+    return;
+  }
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine #HiImm, #LoImm
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg)
+    .addImm(HiOperand.getImm())
+    .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned LoReg = LoOperand.getReg();
+  unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle globals.
+  if (HiOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(),
+                        HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Handle block addresses.
+  if (HiOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addBlockAddress(HiOperand.getBlockAddress(), HiOperand.getOffset(),
+                       HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Handle jump tables.
+  if (HiOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addJumpTableIndex(HiOperand.getIndex(), HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Handle constant pools.
+  if (HiOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+      .addConstantPoolIndex(HiOperand.getIndex(), HiOperand.getOffset(),
+                            HiOperand.getTargetFlags())
+      .addReg(LoReg, LoRegKillFlag);
+    return;
+  }
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine #HiImm, LoReg
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg)
+    .addImm(HiOperand.getImm())
+    .addReg(LoReg, LoRegKillFlag);
+}
+
+void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+  unsigned HiReg = HiOperand.getReg();
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Handle global.
+  if (LoOperand.isGlobal()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiReg, HiRegKillFlag)
+      .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(),
+                        LoOperand.getTargetFlags());
+    return;
+  }
+  // Handle block addresses.
+  if (LoOperand.isBlockAddress()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiReg, HiRegKillFlag)
+      .addBlockAddress(LoOperand.getBlockAddress(), LoOperand.getOffset(),
+                       LoOperand.getTargetFlags());
+    return;
+  }
+  // Handle jump tables.
+  if (LoOperand.isJTI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiOperand.getReg(), HiRegKillFlag)
+      .addJumpTableIndex(LoOperand.getIndex(), LoOperand.getTargetFlags());
+    return;
+  }
+  // Handle constant pools.
+  if (LoOperand.isCPI()) {
+    BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+      .addReg(HiOperand.getReg(), HiRegKillFlag)
+      .addConstantPoolIndex(LoOperand.getIndex(), LoOperand.getOffset(),
+                            LoOperand.getTargetFlags());
+    return;
+  }
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine HiReg, #LoImm
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg)
+    .addReg(HiReg, HiRegKillFlag)
+    .addImm(LoOperand.getImm());
+}
+
+void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
+                                         unsigned DoubleDestReg,
+                                         MachineOperand &HiOperand,
+                                         MachineOperand &LoOperand) {
+  unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill());
+  unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill());
+  unsigned LoReg = LoOperand.getReg();
+  unsigned HiReg = HiOperand.getReg();
+
+  DebugLoc DL = InsertPt->getDebugLoc();
+  MachineBasicBlock *BB = InsertPt->getParent();
+
+  // Insert new combine instruction.
+  //  DoubleRegDest = combine HiReg, LoReg
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg)
+    .addReg(HiReg, HiRegKillFlag)
+    .addReg(LoReg, LoRegKillFlag);
+}
+
+FunctionPass *llvm::createHexagonCopyToCombine() {
+  return new HexagonCopyToCombine();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
new file mode 100644
index 0000000..ee0c318
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -0,0 +1,1063 @@
+//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a Hexagon-specific if-conversion pass that runs on the
+// SSA form.
+// In SSA it is not straightforward to represent instructions that condi-
+// tionally define registers, since a conditionally-defined register may
+// only be used under the same condition on which the definition was based.
+// To avoid complications of this nature, this patch will only generate
+// predicated stores, and speculate other instructions from the "if-conver-
+// ted" block.
+// The code will recognize CFG patterns where a block with a conditional
+// branch "splits" into a "true block" and a "false block". Either of these
+// could be omitted (in case of a triangle, for example).
+// If after conversion of the side block(s) the CFG allows it, the resul-
+// ting blocks may be merged. If the "join" block contained PHI nodes, they
+// will be replaced with MUX (or MUX-like) instructions to maintain the
+// semantics of the PHI.
+//
+// Example:
+//
+//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+//         J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
+//         J2_jump <BB#4>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#4(62) BB#5(62)
+//
+// BB#4: derived from LLVM BB %if.then
+//     Predecessors according to CFG: BB#3
+//         %vreg11<def> = A2_addp %vreg6, %vreg10
+//         S2_storerd_io %vreg32, 16, %vreg11
+//     Successors according to CFG: BB#5
+//
+// BB#5: derived from LLVM BB %if.end
+//     Predecessors according to CFG: BB#3 BB#4
+//         %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
+//         %vreg13<def> = A2_addp %vreg7, %vreg12
+//         %vreg42<def> = C2_cmpeqi %vreg9, 10
+//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+//         J2_jump <BB#6>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#6(4) BB#3(124)
+//
+// would become:
+//
+//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// spec->  %vreg11<def> = A2_addp %vreg6, %vreg10
+// pred->  S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
+//         %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+//         %vreg13<def> = A2_addp %vreg7, %vreg46
+//         %vreg42<def> = C2_cmpeqi %vreg9, 10
+//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+//         J2_jump <BB#6>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#6 BB#3
+
+#define DEBUG_TYPE "hexagon-eif"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonEarlyIfConversion();
+  void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
+}
+
+namespace {
+  cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
+    cl::init(false), cl::desc("Enable branch probability info"));
+  cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
+    cl::desc("Size limit in Hexagon early if-conversion"));
+
+  struct PrintMB {
+    PrintMB(const MachineBasicBlock *B) : MB(B) {}
+    const MachineBasicBlock *MB;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
+    if (!P.MB)
+      return OS << "<none>";
+    return OS << '#' << P.MB->getNumber();
+  }
+
+  struct FlowPattern {
+    FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+    FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
+          MachineBasicBlock *FB, MachineBasicBlock *JB)
+      : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
+
+    MachineBasicBlock *SplitB;
+    MachineBasicBlock *TrueB, *FalseB, *JoinB;
+    unsigned PredR;
+  };
+  struct PrintFP {
+    PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
+      : FP(P), TRI(T) {}
+    const FlowPattern &FP;
+    const TargetRegisterInfo &TRI;
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
+  };
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
+    OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
+       << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
+       << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
+       << PrintMB(P.FP.FalseB)
+       << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
+    return OS;
+  }
+
+  class HexagonEarlyIfConversion : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonEarlyIfConversion() : MachineFunctionPass(ID),
+        TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+      initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon early if conversion";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    typedef DenseSet<MachineBasicBlock*> BlockSetType;
+
+    bool isPreheader(const MachineBasicBlock *B) const;
+    bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
+          FlowPattern &FP);
+    bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
+    bool visitLoop(MachineLoop *L);
+
+    bool hasEHLabel(const MachineBasicBlock *B) const;
+    bool hasUncondBranch(const MachineBasicBlock *B) const;
+    bool isValidCandidate(const MachineBasicBlock *B) const;
+    bool usesUndefVReg(const MachineInstr *MI) const;
+    bool isValid(const FlowPattern &FP) const;
+    unsigned countPredicateDefs(const MachineBasicBlock *B) const;
+    unsigned computePhiCost(MachineBasicBlock *B) const;
+    bool isProfitable(const FlowPattern &FP) const;
+    bool isPredicableStore(const MachineInstr *MI) const;
+    bool isSafeToSpeculate(const MachineInstr *MI) const;
+
+    unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
+    void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
+          MachineInstr *MI, unsigned PredR, bool IfTrue);
+    void predicateBlockNB(MachineBasicBlock *ToB,
+          MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+          unsigned PredR, bool IfTrue);
+
+    void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
+    void convert(const FlowPattern &FP);
+
+    void removeBlock(MachineBasicBlock *B);
+    void eliminatePhis(MachineBasicBlock *B);
+    void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
+    void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
+    void simplifyFlowGraph(const FlowPattern &FP);
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineFunction *MFN;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *MDT;
+    MachineLoopInfo *MLI;
+    BlockSetType Deleted;
+    const MachineBranchProbabilityInfo *MBPI;
+  };
+
+  char HexagonEarlyIfConversion::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
+  "Hexagon early if conversion", false, false)
+
+bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
+  if (B->succ_size() != 1)
+    return false;
+  MachineBasicBlock *SB = *B->succ_begin();
+  MachineLoop *L = MLI->getLoopFor(SB);
+  return L && SB == L->getHeader();
+}
+
+
+bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
+    MachineLoop *L, FlowPattern &FP) {
+  DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
+
+  // Interested only in conditional branches, no .new, no new-value, etc.
+  // Check the terminators directly, it's easier than handling all responses
+  // from AnalyzeBranch.
+  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
+  if (T1I == B->end())
+    return false;
+  unsigned Opc = T1I->getOpcode();
+  if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
+    return false;
+  unsigned PredR = T1I->getOperand(0).getReg();
+
+  // Get the layout successor, or 0 if B does not have one.
+  MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
+  MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+
+  MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
+  MachineBasicBlock::const_iterator T2I = std::next(T1I);
+  // The second terminator should be an unconditional branch.
+  assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
+  MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
+                                             : T2I->getOperand(0).getMBB();
+  if (T1B == T2B) {
+    // XXX merge if T1B == NextB, or convert branch to unconditional.
+    // mark as diamond with both sides equal?
+    return false;
+  }
+  // Loop could be null for both.
+  if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
+    return false;
+
+  // Record the true/false blocks in such a way that "true" means "if (PredR)",
+  // and "false" means "if (!PredR)".
+  if (Opc == Hexagon::J2_jumpt)
+    TB = T1B, FB = T2B;
+  else
+    TB = T2B, FB = T1B;
+
+  if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
+    return false;
+
+  // Detect triangle first. In case of a triangle, one of the blocks TB/FB
+  // can fall through into the other, in other words, it will be executed
+  // in both cases. We only want to predicate the block that is executed
+  // conditionally.
+  unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
+  unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
+
+  // A block is predicable if it has one predecessor (it must be B), and
+  // it has a single successor. In fact, the block has to end either with
+  // an unconditional branch (which can be predicated), or with a fall-
+  // through.
+  bool TOk = (TNP == 1) && (TNS == 1);
+  bool FOk = (FNP == 1) && (FNS == 1);
+
+  // If neither is predicable, there is nothing interesting.
+  if (!TOk && !FOk)
+    return false;
+
+  MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
+  MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
+  MachineBasicBlock *JB = 0;
+
+  if (TOk) {
+    if (FOk) {
+      if (TSB == FSB)
+        JB = TSB;
+      // Diamond: "if (P) then TB; else FB;".
+    } else {
+      // TOk && !FOk
+      if (TSB == FB) {
+        JB = FB;
+        FB = 0;
+      }
+    }
+  } else {
+    // !TOk && FOk  (at least one must be true by now).
+    if (FSB == TB) {
+      JB = TB;
+      TB = 0;
+    }
+  }
+  // Don't try to predicate loop preheaders.
+  if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
+    DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+                 << " is a loop preheader. Skipping.\n");
+    return false;
+  }
+
+  FP = FlowPattern(B, PredR, TB, FB, JB);
+  DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+  return true;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// contains EH_LABEL.
+bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
+  for (auto &I : *B)
+    if (I.isEHLabel())
+      return true;
+  return false;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// that a block can never fall-through.
+bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
+      const {
+  MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+  while (I != E) {
+    if (I->isBarrier())
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
+      const {
+  if (!B)
+    return true;
+  if (B->isEHPad() || B->hasAddressTaken())
+    return false;
+  if (B->succ_size() == 0)
+    return false;
+
+  for (auto &MI : *B) {
+    if (MI.isDebugValue())
+      continue;
+    if (MI.isConditionalBranch())
+      return false;
+    unsigned Opc = MI.getOpcode();
+    bool IsJMP = (Opc == Hexagon::J2_jump);
+    if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
+      return false;
+    // Look for predicate registers defined by this instruction. It's ok
+    // to speculate such an instruction, but the predicate register cannot
+    // be used outside of this block (or else it won't be possible to
+    // update the use of it after predication). PHI uses will be updated
+    // to use a result of a MUX, and a MUX cannot be created for predicate
+    // registers.
+    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned R = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R))
+        continue;
+      if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
+        continue;
+      for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
+        if (U->getParent()->isPHI())
+          return false;
+    }
+  }
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || !MO->isUse())
+      continue;
+    unsigned R = MO->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    const MachineInstr *DefI = MRI->getVRegDef(R);
+    // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
+    assert(DefI && "Expecting a reaching def in MRI");
+    if (DefI->isImplicitDef())
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
+  if (hasEHLabel(FP.SplitB))  // KLUDGE: see function definition
+    return false;
+  if (FP.TrueB && !isValidCandidate(FP.TrueB))
+    return false;
+  if (FP.FalseB && !isValidCandidate(FP.FalseB))
+    return false;
+  // Check the PHIs in the join block. If any of them use a register
+  // that is defined as IMPLICIT_DEF, do not convert this. This can
+  // legitimately happen if one side of the split never executes, but
+  // the compiler is unable to prove it. That side may then seem to
+  // provide an "undef" value to the join block, however it will never
+  // execute at run-time. If we convert this case, the "undef" will
+  // be used in a MUX instruction, and that may seem like actually
+  // using an undefined value to other optimizations. This could lead
+  // to trouble further down the optimization stream, cause assertions
+  // to fail, etc.
+  if (FP.JoinB) {
+    const MachineBasicBlock &B = *FP.JoinB;
+    for (auto &MI : B) {
+      if (!MI.isPHI())
+        break;
+      if (usesUndefVReg(&MI))
+        return false;
+      unsigned DefR = MI.getOperand(0).getReg();
+      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+      if (RC == &Hexagon::PredRegsRegClass)
+        return false;
+    }
+  }
+  return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
+  assert(B->pred_size() <= 2);
+  if (B->pred_size() < 2)
+    return 0;
+
+  unsigned Cost = 0;
+  MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
+  for (I = B->begin(); I != E; ++I) {
+    const MachineOperand &RO1 = I->getOperand(1);
+    const MachineOperand &RO3 = I->getOperand(3);
+    assert(RO1.isReg() && RO3.isReg());
+    // Must have a MUX if the phi uses a subregister.
+    if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+      Cost++;
+      continue;
+    }
+    MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
+    MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+    if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
+      Cost++;
+  }
+  return Cost;
+}
+
+
+unsigned HexagonEarlyIfConversion::countPredicateDefs(
+      const MachineBasicBlock *B) const {
+  unsigned PredDefs = 0;
+  for (auto &MI : *B) {
+    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned R = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R))
+        continue;
+      if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+        PredDefs++;
+    }
+  }
+  return PredDefs;
+}
+
+
+bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+  if (FP.TrueB && FP.FalseB) {
+
+    // Do not IfCovert if the branch is one sided.
+    if (MBPI) {
+      BranchProbability Prob(9, 10);
+      if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
+        return false;
+      if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
+        return false;
+    }
+
+    // If both sides are predicable, convert them if they join, and the
+    // join block has no other predecessors.
+    MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
+    MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
+    if (TSB != FSB)
+      return false;
+    if (TSB->pred_size() != 2)
+      return false;
+  }
+
+  // Calculate the total size of the predicated blocks.
+  // Assume instruction counts without branches to be the approximation of
+  // the code size. If the predicated blocks are smaller than a packet size,
+  // approximate the spare room in the packet that could be filled with the
+  // predicated/speculated instructions.
+  unsigned TS = 0, FS = 0, Spare = 0;
+  if (FP.TrueB) {
+    TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
+    if (TS < HEXAGON_PACKET_SIZE)
+      Spare += HEXAGON_PACKET_SIZE-TS;
+  }
+  if (FP.FalseB) {
+    FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
+    if (FS < HEXAGON_PACKET_SIZE)
+      Spare += HEXAGON_PACKET_SIZE-TS;
+  }
+  unsigned TotalIn = TS+FS;
+  DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
+               << TotalIn << ", spare room: " << Spare << "\n");
+  if (TotalIn >= SizeLimit+Spare)
+    return false;
+
+  // Count the number of PHI nodes that will need to be updated (converted
+  // to MUX). Those can be later converted to predicated instructions, so
+  // they aren't always adding extra cost.
+  // KLUDGE: Also, count the number of predicate register definitions in
+  // each block. The scheduler may increase the pressure of these and cause
+  // expensive spills (e.g. bitmnp01).
+  unsigned TotalPh = 0;
+  unsigned PredDefs = countPredicateDefs(FP.SplitB);
+  if (FP.JoinB) {
+    TotalPh = computePhiCost(FP.JoinB);
+    PredDefs += countPredicateDefs(FP.JoinB);
+  } else {
+    if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+      MachineBasicBlock *SB = *FP.TrueB->succ_begin();
+      TotalPh += computePhiCost(SB);
+      PredDefs += countPredicateDefs(SB);
+    }
+    if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+      MachineBasicBlock *SB = *FP.FalseB->succ_begin();
+      TotalPh += computePhiCost(SB);
+      PredDefs += countPredicateDefs(SB);
+    }
+  }
+  DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+               << TotalPh << "\n");
+  if (TotalIn+TotalPh >= SizeLimit+Spare)
+    return false;
+
+  DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+  if (PredDefs > 4)
+    return false;
+
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
+      MachineLoop *L) {
+  bool Changed = false;
+
+  // Visit all dominated blocks from the same loop first, then process B.
+  MachineDomTreeNode *N = MDT->getNode(B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  // We will change CFG/DT during this traversal, so take precautions to
+  // avoid problems related to invalidated iterators. In fact, processing
+  // a child C of B cannot cause another child to be removed, but it can
+  // cause a new child to be added (which was a child of C before C itself
+  // was removed. This new child C, however, would have been processed
+  // prior to processing B, so there is no need to process it again.
+  // Simply keep a list of children of B, and traverse that list.
+  typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+  DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+  for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    if (!Deleted.count(SB))
+      Changed |= visitBlock(SB, L);
+  }
+  // When walking down the dominator tree, we want to traverse through
+  // blocks from nested (other) loops, because they can dominate blocks
+  // that are in L. Skip the non-L blocks only after the tree traversal.
+  if (MLI->getLoopFor(B) != L)
+    return Changed;
+
+  FlowPattern FP;
+  if (!matchFlowPattern(B, L, FP))
+    return Changed;
+
+  if (!isValid(FP)) {
+    DEBUG(dbgs() << "Conversion is not valid\n");
+    return Changed;
+  }
+  if (!isProfitable(FP)) {
+    DEBUG(dbgs() << "Conversion is not profitable\n");
+    return Changed;
+  }
+
+  convert(FP);
+  simplifyFlowGraph(FP);
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
+  MachineBasicBlock *HB = L ? L->getHeader() : 0;
+  DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+           : dbgs() << "Visiting function") << "\n");
+  bool Changed = false;
+  if (L) {
+    for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+      Changed |= visitLoop(*I);
+  }
+
+  MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
+  Changed |= visitBlock(L ? HB : EntryB, L);
+  return Changed;
+}
+
+
+bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
+      const {
+  // Exclude post-increment stores. Those return a value, so we cannot
+  // predicate them.
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+  switch (Opc) {
+    // Store byte:
+    case S2_storerb_io: case S4_storerb_rr:
+    case S2_storerbabs: case S4_storeirb_io:  case S2_storerbgp:
+    // Store halfword:
+    case S2_storerh_io: case S4_storerh_rr:
+    case S2_storerhabs: case S4_storeirh_io:  case S2_storerhgp:
+    // Store upper halfword:
+    case S2_storerf_io: case S4_storerf_rr:
+    case S2_storerfabs: case S2_storerfgp:
+    // Store word:
+    case S2_storeri_io: case S4_storeri_rr:
+    case S2_storeriabs: case S4_storeiri_io:  case S2_storerigp:
+    // Store doubleword:
+    case S2_storerd_io: case S4_storerd_rr:
+    case S2_storerdabs: case S2_storerdgp:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
+      const {
+  if (MI->mayLoad() || MI->mayStore())
+    return false;
+  if (MI->isCall() || MI->isBarrier() || MI->isBranch())
+    return false;
+  if (MI->hasUnmodeledSideEffects())
+    return false;
+
+  return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
+      bool IfTrue) const {
+  // Exclude post-increment stores.
+  using namespace Hexagon;
+  switch (Opc) {
+    case S2_storerb_io:
+      return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
+    case S4_storerb_rr:
+      return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
+    case S2_storerbabs:
+    case S2_storerbgp:
+      return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
+    case S4_storeirb_io:
+      return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
+    case S2_storerh_io:
+      return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
+    case S4_storerh_rr:
+      return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
+    case S2_storerhabs:
+    case S2_storerhgp:
+      return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
+    case S2_storerf_io:
+      return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
+    case S4_storerf_rr:
+      return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
+    case S2_storerfabs:
+    case S2_storerfgp:
+      return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
+    case S4_storeirh_io:
+      return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
+    case S2_storeri_io:
+      return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
+    case S4_storeri_rr:
+      return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
+    case S2_storeriabs:
+    case S2_storerigp:
+      return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
+    case S4_storeiri_io:
+      return IfTrue ? S4_storeirit_io : S4_storeirif_io;
+    case S2_storerd_io:
+      return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
+    case S4_storerd_rr:
+      return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
+    case S2_storerdabs:
+    case S2_storerdgp:
+      return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
+  }
+  llvm_unreachable("Unexpected opcode");
+  return 0;
+}
+
+
+void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
+      MachineBasicBlock::iterator At, MachineInstr *MI,
+      unsigned PredR, bool IfTrue) {
+  DebugLoc DL;
+  if (At != ToB->end())
+    DL = At->getDebugLoc();
+  else if (!ToB->empty())
+    DL = ToB->back().getDebugLoc();
+
+  unsigned Opc = MI->getOpcode();
+
+  if (isPredicableStore(MI)) {
+    unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
+    assert(COpc);
+    MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
+      .addReg(PredR);
+    for (MIOperands MO(MI); MO.isValid(); ++MO)
+      MIB.addOperand(*MO);
+
+    // Set memory references.
+    MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+    MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+
+    MI->eraseFromParent();
+    return;
+  }
+
+  if (Opc == Hexagon::J2_jump) {
+    MachineBasicBlock *TB = MI->getOperand(0).getMBB();
+    const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+                                           : Hexagon::J2_jumpf);
+    BuildMI(*ToB, At, DL, D)
+      .addReg(PredR)
+      .addMBB(TB);
+    MI->eraseFromParent();
+    return;
+  }
+
+  // Print the offending instruction unconditionally as we are about to
+  // abort.
+  dbgs() << *MI;
+  llvm_unreachable("Unexpected instruction");
+}
+
+
+// Predicate/speculate non-branch instructions from FromB into block ToB.
+// Leave the branches alone, they will be handled later. Btw, at this point
+// FromB should have at most one branch, and it should be unconditional.
+void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
+      MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+      unsigned PredR, bool IfTrue) {
+  DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+  MachineBasicBlock::iterator End = FromB->getFirstTerminator();
+  MachineBasicBlock::iterator I, NextI;
+
+  for (I = FromB->begin(); I != End; I = NextI) {
+    assert(!I->isPHI());
+    NextI = std::next(I);
+    if (isSafeToSpeculate(&*I))
+      ToB->splice(At, FromB, I);
+    else
+      predicateInstr(ToB, At, &*I, PredR, IfTrue);
+  }
+}
+
+
+void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
+      const FlowPattern &FP) {
+  // Visit all PHI nodes in the WhereB block and generate MUX instructions
+  // in the split block. Update the PHI nodes with the values of the MUX.
+  auto NonPHI = WhereB->getFirstNonPHI();
+  for (auto I = WhereB->begin(); I != NonPHI; ++I) {
+    MachineInstr *PN = &*I;
+    // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
+    unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
+    for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+      const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
+      if (BO.getMBB() == FP.SplitB)
+        SR = RO.getReg(), SSR = RO.getSubReg();
+      else if (BO.getMBB() == FP.TrueB)
+        TR = RO.getReg(), TSR = RO.getSubReg();
+      else if (BO.getMBB() == FP.FalseB)
+        FR = RO.getReg(), FSR = RO.getSubReg();
+      else
+        continue;
+      PN->RemoveOperand(i+1);
+      PN->RemoveOperand(i);
+    }
+    if (TR == 0)
+      TR = SR, TSR = SSR;
+    else if (FR == 0)
+      FR = SR, FSR = SSR;
+    assert(TR && FR);
+
+    using namespace Hexagon;
+    unsigned DR = PN->getOperand(0).getReg();
+    const TargetRegisterClass *RC = MRI->getRegClass(DR);
+    const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
+                                                  : TII->get(MUX64_rr);
+
+    MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
+    DebugLoc DL;
+    if (MuxAt != FP.SplitB->end())
+      DL = MuxAt->getDebugLoc();
+    unsigned MuxR = MRI->createVirtualRegister(RC);
+    BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
+      .addReg(FP.PredR)
+      .addReg(TR, 0, TSR)
+      .addReg(FR, 0, FSR);
+
+    PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+    PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
+  }
+}
+
+
+void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
+  MachineBasicBlock *TSB = 0, *FSB = 0;
+  MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
+  assert(OldTI != FP.SplitB->end());
+  DebugLoc DL = OldTI->getDebugLoc();
+
+  if (FP.TrueB) {
+    TSB = *FP.TrueB->succ_begin();
+    predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
+  }
+  if (FP.FalseB) {
+    FSB = *FP.FalseB->succ_begin();
+    MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
+    predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
+  }
+
+  // Regenerate new terminators in the split block and update the successors.
+  // First, remember any information that may be needed later and remove the
+  // existing terminators/successors from the split block.
+  MachineBasicBlock *SSB = 0;
+  FP.SplitB->erase(OldTI, FP.SplitB->end());
+  while (FP.SplitB->succ_size() > 0) {
+    MachineBasicBlock *T = *FP.SplitB->succ_begin();
+    // It's possible that the split block had a successor that is not a pre-
+    // dicated block. This could only happen if there was only one block to
+    // be predicated. Example:
+    //   split_b:
+    //     if (p) jump true_b
+    //     jump unrelated2_b
+    //   unrelated1_b:
+    //     ...
+    //   unrelated2_b:  ; can have other predecessors, so it's not "false_b"
+    //     jump other_b
+    //   true_b:        ; only reachable from split_b, can be predicated
+    //     ...
+    //
+    // Find this successor (SSB) if it exists.
+    if (T != FP.TrueB && T != FP.FalseB) {
+      assert(!SSB);
+      SSB = T;
+    }
+    FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
+  }
+
+  // Insert new branches and update the successors of the split block. This
+  // may create unconditional branches to the layout successor, etc., but
+  // that will be cleaned up later. For now, make sure that correct code is
+  // generated.
+  if (FP.JoinB) {
+    assert(!SSB || SSB == FP.JoinB);
+    BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+      .addMBB(FP.JoinB);
+    FP.SplitB->addSuccessor(FP.JoinB);
+  } else {
+    bool HasBranch = false;
+    if (TSB) {
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+        .addReg(FP.PredR)
+        .addMBB(TSB);
+      FP.SplitB->addSuccessor(TSB);
+      HasBranch = true;
+    }
+    if (FSB) {
+      const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
+                                       : TII->get(Hexagon::J2_jumpf);
+      MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
+      if (!HasBranch)
+        MIB.addReg(FP.PredR);
+      MIB.addMBB(FSB);
+      FP.SplitB->addSuccessor(FSB);
+    }
+    if (SSB) {
+      // This cannot happen if both TSB and FSB are set. [TF]SB are the
+      // successor blocks of the TrueB and FalseB (or null of the TrueB
+      // or FalseB block is null). SSB is the potential successor block
+      // of the SplitB that is neither TrueB nor FalseB.
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+        .addMBB(SSB);
+      FP.SplitB->addSuccessor(SSB);
+    }
+  }
+
+  // What is left to do is to update the PHI nodes that could have entries
+  // referring to predicated blocks.
+  if (FP.JoinB) {
+    updatePhiNodes(FP.JoinB, FP);
+  } else {
+    if (TSB)
+      updatePhiNodes(TSB, FP);
+    if (FSB)
+      updatePhiNodes(FSB, FP);
+    // Nothing to update in SSB, since SSB's predecessors haven't changed.
+  }
+}
+
+
+void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
+  DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+
+  // Transfer the immediate dominator information from B to its descendants.
+  MachineDomTreeNode *N = MDT->getNode(B);
+  MachineDomTreeNode *IDN = N->getIDom();
+  if (IDN) {
+    MachineBasicBlock *IDB = IDN->getBlock();
+    typedef GraphTraits<MachineDomTreeNode*> GTN;
+    typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+    DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+    for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+      MachineBasicBlock *SB = (*I)->getBlock();
+      MDT->changeImmediateDominator(SB, IDB);
+    }
+  }
+
+  while (B->succ_size() > 0)
+    B->removeSuccessor(B->succ_begin());
+
+  for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
+    (*I)->removeSuccessor(B, true);
+
+  Deleted.insert(B);
+  MDT->eraseNode(B);
+  MFN->erase(B->getIterator());
+}
+
+
+void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
+  DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+  MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
+  for (I = B->begin(); I != NonPHI; I = NextI) {
+    NextI = std::next(I);
+    MachineInstr *PN = &*I;
+    assert(PN->getNumOperands() == 3 && "Invalid phi node");
+    MachineOperand &UO = PN->getOperand(1);
+    unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
+    unsigned DefR = PN->getOperand(0).getReg();
+    unsigned NewR = UseR;
+    if (UseSR) {
+      // MRI.replaceVregUsesWith does not allow to update the subregister,
+      // so instead of doing the use-iteration here, create a copy into a
+      // "non-subregistered" register.
+      DebugLoc DL = PN->getDebugLoc();
+      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+      NewR = MRI->createVirtualRegister(RC);
+      NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+        .addReg(UseR, 0, UseSR);
+    }
+    MRI->replaceRegWith(DefR, NewR);
+    B->erase(I);
+  }
+}
+
+
+void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
+      MachineBasicBlock *NewB) {
+  for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *SB = *I;
+    MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
+    for (P = SB->begin(); P != N; ++P) {
+      MachineInstr *PN = &*P;
+      for (MIOperands MO(PN); MO.isValid(); ++MO)
+        if (MO->isMBB() && MO->getMBB() == OldB)
+          MO->setMBB(NewB);
+    }
+  }
+}
+
+
+void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
+      MachineBasicBlock *SuccB) {
+  DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+               << PrintMB(SuccB) << "\n");
+  bool TermOk = hasUncondBranch(SuccB);
+  eliminatePhis(SuccB);
+  TII->RemoveBranch(*PredB);
+  PredB->removeSuccessor(SuccB);
+  PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
+  MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
+  for (I = SuccB->succ_begin(); I != E; ++I)
+    PredB->addSuccessor(*I);
+  PredB->normalizeSuccProbs();
+  replacePhiEdges(SuccB, PredB);
+  removeBlock(SuccB);
+  if (!TermOk)
+    PredB->updateTerminator();
+}
+
+
+void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+  if (FP.TrueB)
+    removeBlock(FP.TrueB);
+  if (FP.FalseB)
+    removeBlock(FP.FalseB);
+
+  FP.SplitB->updateTerminator();
+  if (FP.SplitB->succ_size() != 1)
+    return;
+
+  MachineBasicBlock *SB = *FP.SplitB->succ_begin();
+  if (SB->pred_size() != 1)
+    return;
+
+  // By now, the split block has only one successor (SB), and SB has only
+  // one predecessor. We can try to merge them. We will need to update ter-
+  // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+  // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
+  // with an unconditional branch, we won't need to touch the terminators.
+  if (!hasEHLabel(SB) || hasUncondBranch(SB))
+    mergeBlocks(FP.SplitB, SB);
+}
+
+
+bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
+  auto &ST = MF.getSubtarget();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  MFN = &MF;
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+    nullptr;
+
+  Deleted.clear();
+  bool Changed = false;
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+    Changed |= visitLoop(*I);
+  Changed |= visitLoop(0);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+FunctionPass *llvm::createHexagonEarlyIfConversion() {
+  return new HexagonEarlyIfConversion();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..ce10aea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1357 @@
+//===--- HexagonExpandCondsets.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+//   vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+//   vreg0 = A2_tfrt vreg0, vreg1
+//   vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+//   vreg0 = A2_or vreg1, vreg2
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+//   vreg0 = A2_or vreg1, vreg2
+//   ... [intervening instructions] ...
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//   ... [intervening instructions, vreg0->vreg3] ...
+//   [deleted]
+// variant "down":
+//   [deleted]
+//   ... [intervening instructions] ...
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+//   vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+//   vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+//   vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+  void initializeHexagonExpandCondsetsPass(PassRegistry&);
+  FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+  class HexagonExpandCondsets : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonExpandCondsets() :
+        MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+        LIS(0), CoaLimitActive(false),
+        TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+      if (OptCoaLimit.getPosition())
+        CoaLimitActive = true, CoaLimit = OptCoaLimit;
+      if (OptTfrLimit.getPosition())
+        TfrLimitActive = true, TfrLimit = OptTfrLimit;
+      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Expand Condsets";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    const HexagonInstrInfo *HII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveIntervals *LIS;
+
+    bool CoaLimitActive, TfrLimitActive;
+    unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+    struct RegisterRef {
+      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+          Sub(Op.getSubReg()) {}
+      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+      bool operator== (RegisterRef RR) const {
+        return Reg == RR.Reg && Sub == RR.Sub;
+      }
+      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+      unsigned Reg, Sub;
+    };
+
+    typedef DenseMap<unsigned,unsigned> ReferenceMap;
+    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+    unsigned getMaskForSub(unsigned Sub);
+    bool isCondset(const MachineInstr *MI);
+
+    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+    LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+    LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+    void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+    void makeUndead(unsigned Reg, SlotIndex S);
+    void shrinkToUses(unsigned Reg, LiveInterval &LI);
+    void updateKillFlags(unsigned Reg, LiveInterval &LI);
+    void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+        LiveInterval &LI);
+    void addInstrToLiveness(MachineInstr *MI);
+    void removeInstrFromLiveness(MachineInstr *MI);
+
+    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+    MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+        unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+    bool split(MachineInstr *MI);
+    bool splitInBlock(MachineBasicBlock &B);
+
+    bool isPredicable(MachineInstr *MI);
+    MachineInstr *getReachingDefForPred(RegisterRef RD,
+        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+    bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+    bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+    void predicateAt(RegisterRef RD, MachineInstr *MI,
+        MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+        bool Cond, MachineBasicBlock::iterator First,
+        MachineBasicBlock::iterator Last);
+    bool predicate(MachineInstr *TfrI, bool Cond);
+    bool predicateInBlock(MachineBasicBlock &B);
+
+    void postprocessUndefImplicitUses(MachineBasicBlock &B);
+    void removeImplicitUses(MachineInstr *MI);
+    void removeImplicitUses(MachineBasicBlock &B);
+
+    bool isIntReg(RegisterRef RR, unsigned &BW);
+    bool isIntraBlocks(LiveInterval &LI);
+    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+    bool coalesceSegments(MachineFunction &MF);
+  };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+  switch (Sub) {
+    case Hexagon::subreg_loreg:
+      return Sub_Low;
+    case Hexagon::subreg_hireg:
+      return Sub_High;
+    case Hexagon::NoSubRegister:
+      return Sub_None;
+  }
+  llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::C2_mux:
+    case Hexagon::C2_muxii:
+    case Hexagon::C2_muxir:
+    case Hexagon::C2_muxri:
+    case Hexagon::MUX64_rr:
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    Map.insert(std::make_pair(RR.Reg, Mask));
+  else
+    F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    return false;
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  if (Mask & F->second)
+    return true;
+  return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+      SlotIndex S) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->start >= S)
+      return I;
+  }
+  return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+      SlotIndex S) {
+  LiveInterval::iterator P = LI.end();
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->end > S)
+      return P;
+    P = I;
+  }
+  return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+      bool SetDef) {
+  if (!S.isRegister())
+    return;
+  MachineInstr *MI = LIS->getInstructionFromIndex(S);
+  assert(MI && "Expecting instruction");
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+      continue;
+    bool IsDef = !Op.isUndef();
+    if (Op.isImplicit() && IsDef != SetDef)
+      Op.setIsUndef(!SetDef);
+  }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+  // If S is a block boundary, then there can still be a dead def reaching
+  // this point. Instead of traversing the CFG, queue start points of all
+  // live segments that begin with a register, and end at a block boundary.
+  // This may "resurrect" some truly dead definitions, but doing so is
+  // harmless.
+  SmallVector<MachineInstr*,8> Defs;
+  if (S.isBlock()) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+      if (!I->start.isRegister() || !I->end.isBlock())
+        continue;
+      MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+      Defs.push_back(MI);
+    }
+  } else if (S.isRegister()) {
+    MachineInstr *MI = LIS->getInstructionFromIndex(S);
+    Defs.push_back(MI);
+  }
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    MachineInstr *MI = Defs[i];
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+  }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+  SmallVector<MachineInstr*,4> Deads;
+  LIS->shrinkToUses(&LI, &Deads);
+  // Need to undo the deadification made by "shrinkToUses". It's easier to
+  // do it here, since we have a list of all instructions that were just
+  // marked as dead.
+  for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+    MachineInstr *MI = Deads[i];
+    // Clear the "dead" flag.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+    // Extend the live segment to the beginning of the next one.
+    LiveInterval::iterator End = LI.end();
+    SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+    LiveInterval::iterator T = LI.FindSegmentContaining(S);
+    assert(T != End);
+    LiveInterval::iterator N = std::next(T);
+    if (N != End)
+      T->end = N->start;
+    else
+      T->end = LIS->getMBBEndIdx(MI->getParent());
+  }
+  updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+  MRI->clearKillFlags(Reg);
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    SlotIndex EX = I->end;
+    if (!EX.isRegister())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+        continue;
+      // Only set the kill flag on the first encountered use of Reg in this
+      // instruction.
+      Op.setIsKill(true);
+      break;
+    }
+  }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+      SlotIndex S, LiveInterval &LI) {
+  // Terminate the live segment pointed to by LT within a live interval LI.
+  if (LT == LI.end())
+    return;
+
+  VNInfo *OldVN = LT->valno;
+  SlotIndex EX = LT->end;
+  LT->end = S;
+  // If LT does not end at a block boundary, the termination is done.
+  if (!EX.isBlock())
+    return;
+
+  // If LT ended at a block boundary, it's possible that its value number
+  // is picked up at the beginning other blocks. Create a new value number
+  // and change such blocks to use it instead.
+  VNInfo *NewVN = 0;
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (!I->start.isBlock() || I->valno != OldVN)
+      continue;
+    // Generate on-demand a new value number that is defined by the
+    // block beginning (i.e. -phi).
+    if (!NewVN)
+      NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+    I->valno = NewVN;
+  }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+                                       : LIS->getInstructionIndex(MI);
+  DEBUG(dbgs() << "adding liveness info for instr\n  " << MX << "  " << *MI);
+
+  MX = MX.getRegSlot();
+  bool Predicated = HII->isPredicated(MI);
+  MachineBasicBlock *MB = MI->getParent();
+
+  // Strip all implicit uses from predicated instructions. They will be
+  // added again, according to the updated information.
+  if (Predicated)
+    removeImplicitUses(MI);
+
+  // For each def in MI we need to insert a new live segment starting at MX
+  // into the interval. If there already exists a live segment in the interval
+  // that contains MX, we need to terminate it at MX.
+  SmallVector<RegisterRef,2> Defs;
+  for (auto &Op : MI->operands())
+    if (Op.isReg() && Op.isDef())
+      Defs.push_back(RegisterRef(Op));
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    unsigned DefR = Defs[i].Reg;
+    LiveInterval &LID = LIS->getInterval(DefR);
+    DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    // If MX falls inside of an existing live segment, terminate it.
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    if (LT != LID.end())
+      terminateSegment(LT, MX, LID);
+    DEBUG(dbgs() << "after terminating segment\n  " << LID << "\n");
+
+    // Create a new segment starting from MX.
+    LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+    SlotIndex EX;
+    VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+    if (N == LID.end()) {
+      // There is no live segment after MX. End this segment at the end of
+      // the block.
+      EX = LIS->getMBBEndIdx(MB);
+    } else {
+      // If the next segment starts at the block boundary, end the new segment
+      // at the boundary of the preceding block (i.e. the previous index).
+      // Otherwise, end the segment at the beginning of the next segment. In
+      // either case it will be "shrunk-to-uses" later.
+      EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+    }
+    if (Predicated) {
+      // Predicated instruction will have an implicit use of the defined
+      // register. This is necessary so that this definition will not make
+      // any previous definitions dead. If there are no previous live
+      // segments, still add the implicit use, but make it "undef".
+      // Because of the implicit use, the preceding definition is not
+      // dead. Mark is as such (if necessary).
+      MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+      ImpUse.setSubReg(Defs[i].Sub);
+      bool Undef = false;
+      if (P == LID.end())
+        Undef = true;
+      else {
+        // If the previous segment extends to the end of the previous block,
+        // the end index may actually be the beginning of this block. If
+        // the previous segment ends at a block boundary, move it back by one,
+        // to get the proper block for it.
+        SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+        MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+        if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+          Undef = true;
+      }
+      if (!Undef) {
+        makeUndead(DefR, P->valno->def);
+        // We are adding a live use, so extend the previous segment to
+        // include it.
+        P->end = MX;
+      } else {
+        ImpUse.setIsUndef(true);
+      }
+
+      if (!MI->readsRegister(DefR))
+        MI->addOperand(ImpUse);
+      if (N != LID.end())
+        makeDefined(DefR, N->start, true);
+    }
+    LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+    LID.addSegment(NR);
+    DEBUG(dbgs() << "added a new segment " << NR << "\n  " << LID << "\n");
+    shrinkToUses(DefR, LID);
+    DEBUG(dbgs() << "updated imp-uses: " << *MI);
+    LID.verify();
+  }
+
+  // For each use in MI:
+  // - If there is no live segment that contains MX for the used register,
+  //   extend the previous one. Ignore implicit uses.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+      continue;
+    unsigned UseR = Op.getReg();
+    LiveInterval &LIU = LIS->getInterval(UseR);
+    // Find the last segment P that starts before MX.
+    LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+    if (P == LIU.end())
+      P = prevSegment(LIU, MX);
+
+    assert(P != LIU.end() && "MI uses undefined register?");
+    SlotIndex EX = P->end;
+    // If P contains MX, there is not much to do.
+    if (EX > MX) {
+      Op.setIsKill(false);
+      continue;
+    }
+    // Otherwise, extend P to "next(MX)".
+    P->end = MX.getNextIndex();
+    Op.setIsKill(true);
+    // Get the old "kill" instruction, and remove the kill flag.
+    if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+      KI->clearRegisterKills(UseR, nullptr);
+    shrinkToUses(UseR, LIU);
+    LIU.verify();
+  }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+  DEBUG(dbgs() << "removing instr\n  " << MX << "  " << *MI);
+
+  // For each def in MI:
+  // If MI starts a live segment, merge this segment with the previous segment.
+  //
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned DefR = Op.getReg();
+    LiveInterval &LID = LIS->getInterval(DefR);
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    assert(LT != LID.end() && "Expecting live segments");
+    DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    if (LT->start != MX)
+      continue;
+
+    VNInfo *MVN = LT->valno;
+    if (LT != LID.begin()) {
+      // If the current live segment is not the first, the task is easy. If
+      // the previous segment continues into the current block, extend it to
+      // the end of the current one, and merge the value numbers.
+      // Otherwise, remove the current segment, and make the end of it "undef".
+      LiveInterval::iterator P = std::prev(LT);
+      SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+      MachineBasicBlock *MB = MI->getParent();
+      MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+      if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+        makeDefined(DefR, LT->end, false);
+        LID.removeSegment(*LT);
+      } else {
+        // Make the segments adjacent, so that merge-vn can also merge the
+        // segments.
+        P->end = LT->start;
+        makeUndead(DefR, P->valno->def);
+        LID.MergeValueNumberInto(MVN, P->valno);
+      }
+    } else {
+      LiveInterval::iterator N = std::next(LT);
+      LiveInterval::iterator RmB = LT, RmE = N;
+      while (N != LID.end()) {
+        // Iterate until the first register-based definition is found
+        // (i.e. skip all block-boundary entries).
+        LiveInterval::iterator Next = std::next(N);
+        if (N->start.isRegister()) {
+          makeDefined(DefR, N->start, false);
+          break;
+        }
+        if (N->end.isRegister()) {
+          makeDefined(DefR, N->end, false);
+          RmE = Next;
+          break;
+        }
+        RmE = Next;
+        N = Next;
+      }
+      // Erase the segments in one shot to avoid invalidating iterators.
+      LID.segments.erase(RmB, RmE);
+    }
+
+    bool VNUsed = false;
+    for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+      if (I->valno != MVN)
+        continue;
+      VNUsed = true;
+      break;
+    }
+    if (!VNUsed)
+      MVN->markUnused();
+
+    DEBUG(dbgs() << "new interval: ");
+    if (!LID.empty()) {
+      DEBUG(dbgs() << LID << "\n");
+      LID.verify();
+    } else {
+      DEBUG(dbgs() << "<empty>\n");
+      LIS->removeInterval(DefR);
+    }
+  }
+
+  // For uses there is nothing to do. The intervals will be updated via
+  // shrinkToUses.
+  SmallVector<unsigned,4> Uses;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.push_back(R);
+  }
+  LIS->RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+    LiveInterval &LI = LIS->getInterval(Uses[i]);
+    shrinkToUses(Uses[i], LI);
+  }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+      bool Cond) {
+  using namespace Hexagon;
+  if (SO.isReg()) {
+    unsigned PhysR;
+    RegisterRef RS = SO;
+    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+      assert(VC->begin() != VC->end() && "Empty register class");
+      PhysR = *VC->begin();
+    } else {
+      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+      PhysR = RS.Reg;
+    }
+    unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+    switch (RC->getSize()) {
+      case 4:
+        return Cond ? A2_tfrt : A2_tfrf;
+      case 8:
+        return Cond ? A2_tfrpt : A2_tfrpf;
+    }
+    llvm_unreachable("Invalid register operand");
+  }
+  if (SO.isImm() || SO.isFPImm())
+    return Cond ? C2_cmoveit : C2_cmoveif;
+  llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+  MachineInstr *MI = SrcOp.getParent();
+  MachineBasicBlock &B = *MI->getParent();
+  MachineBasicBlock::iterator At = MI;
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Don't avoid identity copies here (i.e. if the source and the destination
+  // are the same registers). It is actually better to generate them here,
+  // since this would cause the copy to potentially be predicated in the next
+  // step. The predication will remove such a copy if it is unable to
+  /// predicate.
+
+  unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+  MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+        .addReg(DstR, RegState::Define, DstSR)
+        .addOperand(PredOp)
+        .addOperand(SrcOp);
+  // We don't want any kills yet.
+  TfrI->clearKillInfo();
+  DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+  return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+  if (TfrLimitActive) {
+    if (TfrCounter >= TfrLimit)
+      return false;
+    TfrCounter++;
+  }
+  DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+               << ": " << *MI);
+  MachineOperand &MD = MI->getOperand(0); // Definition
+  MachineOperand &MP = MI->getOperand(1); // Predicate register
+  assert(MD.isDef());
+  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+  // First, create the two invididual conditional transfers, and add each
+  // of them to the live intervals information. Do that first and then remove
+  // the old instruction from live intervals.
+  if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+    addInstrToLiveness(TfrT);
+  if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+    addInstrToLiveness(TfrF);
+  removeInstrFromLiveness(MI);
+
+  return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    if (isCondset(I))
+      Changed |= split(I);
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+  if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+    return false;
+  if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+    return false;
+  // Reject instructions with multiple defs (e.g. post-increment loads).
+  bool HasDef = false;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    if (HasDef)
+      return false;
+    HasDef = true;
+  }
+  for (auto &Mo : MI->memoperands())
+    if (Mo->isVolatile())
+      return false;
+  return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+  MachineBasicBlock &B = *UseIt->getParent();
+  MachineBasicBlock::iterator I = UseIt, S = B.begin();
+  if (I == S)
+    return 0;
+
+  bool PredValid = true;
+  do {
+    --I;
+    MachineInstr *MI = &*I;
+    // Check if this instruction can be ignored, i.e. if it is predicated
+    // on the complementary condition.
+    if (PredValid && HII->isPredicated(MI)) {
+      if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+        continue;
+    }
+
+    // Check the defs. If the PredR is defined, invalidate it. If RD is
+    // defined, return the instruction or 0, depending on the circumstances.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      RegisterRef RR = Op;
+      if (RR.Reg == PredR) {
+        PredValid = false;
+        continue;
+      }
+      if (RR.Reg != RD.Reg)
+        continue;
+      // If the "Reg" part agrees, there is still the subregister to check.
+      // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+      // not vreg1 (w/o subregisters).
+      if (RR.Sub == RD.Sub)
+        return MI;
+      if (RR.Sub == 0 || RD.Sub == 0)
+        return 0;
+      // We have different subregisters, so we can continue looking.
+    }
+  } while (I != S);
+
+  return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+      ReferenceMap &Uses) {
+  // In order to be able to safely move MI over instructions that define
+  // "Defs" and use "Uses", no def operand from MI can be defined or used
+  // and no use operand can be defined.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    RegisterRef RR = Op;
+    // For physical register we would need to check register aliases, etc.
+    // and we don't want to bother with that. It would be of little value
+    // before the actual register rewriting (from virtual to physical).
+    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+      return false;
+    // No redefs for any operand.
+    if (isRefInMap(RR, Defs, Exec_Then))
+      return false;
+    // For defs, there cannot be uses.
+    if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+      return false;
+  }
+  return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+      bool IsDown) {
+  bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+  if (!IsLoad && !IsStore)
+    return true;
+  if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+    return true;
+  if (TheI->hasUnmodeledSideEffects())
+    return false;
+
+  MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+  MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+  bool Ordered = TheI->hasOrderedMemoryRef();
+
+  // Search for aliased memory reference in (StartI, EndI).
+  for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+    MachineInstr *MI = &*I;
+    if (MI->hasUnmodeledSideEffects())
+      return false;
+    bool L = MI->mayLoad(), S = MI->mayStore();
+    if (!L && !S)
+      continue;
+    if (Ordered && MI->hasOrderedMemoryRef())
+      return false;
+
+    bool Conflict = (L && IsStore) || S;
+    if (Conflict)
+      return false;
+  }
+  return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+      MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+  // The problem with updating live intervals is that we can move one def
+  // past another def. In particular, this can happen when moving an A2_tfrt
+  // over an A2_tfrf defining the same register. From the point of view of
+  // live intervals, these two instructions are two separate definitions,
+  // and each one starts another live segment. LiveIntervals's "handleMove"
+  // does not allow such moves, so we need to handle it ourselves. To avoid
+  // invalidating liveness data while we are using it, the move will be
+  // implemented in 4 steps: (1) add a clone of the instruction MI at the
+  // target location, (2) update liveness, (3) delete the old instruction,
+  // and (4) update liveness again.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
+  unsigned Opc = MI->getOpcode();
+  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+  unsigned Ox = 0, NP = MI->getNumOperands();
+  // Skip all defs from MI first.
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isDef())
+      break;
+    Ox++;
+  }
+  // Add the new def, then the predicate register, then the rest of the
+  // operands.
+  MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+  MB.addReg(PredR);
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isImplicit())
+      MB.addOperand(MO);
+    Ox++;
+  }
+
+  MachineFunction &MF = *B.getParent();
+  MachineInstr::mmo_iterator I = MI->memoperands_begin();
+  unsigned NR = std::distance(I, MI->memoperands_end());
+  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+  for (unsigned i = 0; i < NR; ++i)
+    MemRefs[i] = *I++;
+  MB.setMemRefs(MemRefs, MemRefs+NR);
+
+  MachineInstr *NewI = MB;
+  NewI->clearKillInfo();
+  addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+      MachineBasicBlock::iterator Last) {
+  MachineBasicBlock::iterator End = std::next(Last);
+  for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+    MachineInstr *MI = &*I;
+    // Do not touch instructions that are not predicated, or are predicated
+    // on the opposite condition.
+    if (!HII->isPredicated(MI))
+      continue;
+    if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+      continue;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || RO != RegisterRef(Op))
+        continue;
+      Op.setReg(RN.Reg);
+      Op.setSubReg(RN.Sub);
+      // In practice, this isn't supposed to see any defs.
+      assert(!Op.isDef() && "Not expecting a def");
+    }
+  }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+  unsigned Opc = TfrI->getOpcode();
+  (void)Opc;
+  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+               << ": " << *TfrI);
+
+  MachineOperand &MD = TfrI->getOperand(0);
+  MachineOperand &MP = TfrI->getOperand(1);
+  MachineOperand &MS = TfrI->getOperand(2);
+  // The source operand should be a <kill>. This is not strictly necessary,
+  // but it makes things a lot simpler. Otherwise, we would need to rename
+  // some registers, which would complicate the transformation considerably.
+  if (!MS.isKill())
+    return false;
+
+  RegisterRef RT(MS);
+  unsigned PredR = MP.getReg();
+  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+  if (!DefI || !isPredicable(DefI))
+    return false;
+
+  DEBUG(dbgs() << "Source def: " << *DefI);
+
+  // Collect the information about registers defined and used between the
+  // DefI and the TfrI.
+  // Map: reg -> bitmask of subregs
+  ReferenceMap Uses, Defs;
+  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+  // Check if the predicate register is valid between DefI and TfrI.
+  // If it is, we can then ignore instructions predicated on the negated
+  // conditions when collecting def and use information.
+  bool PredValid = true;
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    if (!I->modifiesRegister(PredR, 0))
+      continue;
+    PredValid = false;
+    break;
+  }
+
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    MachineInstr *MI = &*I;
+    // If this instruction is predicated on the same register, it could
+    // potentially be ignored.
+    // By default assume that the instruction executes on the same condition
+    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+    unsigned Exec = Exec_Then | Exec_Else;
+    if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+      Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg())
+        continue;
+      // We don't want to deal with physical registers. The reason is that
+      // they can be aliased with other physical registers. Aliased virtual
+      // registers must share the same register number, and can only differ
+      // in the subregisters, which we are keeping track of. Physical
+      // registers ters no longer have subregisters---their super- and
+      // subregisters are other physical registers, and we are not checking
+      // that.
+      RegisterRef RR = Op;
+      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+        return false;
+
+      ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+      addRefToMap(RR, Map, Exec);
+    }
+  }
+
+  // The situation:
+  //   RT = DefI
+  //   ...
+  //   RD = TfrI ..., RT
+
+  // If the register-in-the-middle (RT) is used or redefined between
+  // DefI and TfrI, we may not be able proceed with this transformation.
+  // We can ignore a def that will not execute together with TfrI, and a
+  // use that will. If there is such a use (that does execute together with
+  // TfrI), we will not be able to move DefI down. If there is a use that
+  // executed if TfrI's condition is false, then RT must be available
+  // unconditionally (cannot be predicated).
+  // Essentially, we need to be able to rename RT to RD in this segment.
+  if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+    return false;
+  RegisterRef RD = MD;
+  // If the predicate register is defined between DefI and TfrI, the only
+  // potential thing to do would be to move the DefI down to TfrI, and then
+  // predicate. The reaching def (DefI) must be movable down to the location
+  // of the TfrI.
+  // If the target register of the TfrI (RD) is not used or defined between
+  // DefI and TfrI, consider moving TfrI up to DefI.
+  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
+  bool CanDown = canMoveOver(DefI, Defs, Uses);
+  // The TfrI does not access memory, but DefI could. Check if it's safe
+  // to move DefI down to TfrI.
+  if (DefI->mayLoad() || DefI->mayStore())
+    if (!canMoveMemTo(DefI, TfrI, true))
+      CanDown = false;
+
+  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+  if (CanUp)
+    predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+  else if (CanDown)
+    predicateAt(RD, DefI, TfrIt, PredR, Cond);
+  else
+    return false;
+
+  if (RT != RD)
+    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+  // Delete the user of RT first (it should work either way, but this order
+  // of deleting is more natural).
+  removeInstrFromLiveness(TfrI);
+  removeInstrFromLiveness(DefI);
+  return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    unsigned Opc = I->getOpcode();
+    if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+      bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+      if (!Done) {
+        // If we didn't predicate I, we may need to remove it in case it is
+        // an "identity" copy, e.g.  vreg1 = A2_tfrt vreg2, vreg1.
+        if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+          removeInstrFromLiveness(I);
+      }
+      Changed |= Done;
+    }
+  }
+  return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+  for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+    MachineOperand &MO = MI->getOperand(i-1);
+    if (MO.isReg() && MO.isUse() && MO.isImplicit())
+      MI->RemoveOperand(i-1);
+  }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (HII->isPredicated(MI))
+      removeImplicitUses(MI);
+  }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+  // Implicit uses that are "undef" are only meaningful (outside of the
+  // internals of this pass) when the instruction defines a subregister,
+  // and the implicit-undef use applies to the defined register. In such
+  // cases, the proper way to record the information in the IR is to mark
+  // the definition as "undef", which will be interpreted as "read-undef".
+  typedef SmallSet<unsigned,2> RegisterSet;
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    RegisterSet Undefs;
+    for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+      MachineOperand &MO = MI->getOperand(i-1);
+      if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+        MI->RemoveOperand(i-1);
+        Undefs.insert(MO.getReg());
+      }
+    }
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+        continue;
+      if (Undefs.count(Op.getReg()))
+        Op.setIsUndef(true);
+    }
+  }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BW = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    BW = (RR.Sub != 0) ? 32 : 64;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &LR = *I;
+    // Range must start at a register...
+    if (!LR.start.isRegister())
+      return false;
+    // ...and end in a register or in a dead slot.
+    if (!LR.end.isRegister() && !LR.end.isDead())
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+  if (CoaLimitActive) {
+    if (CoaCounter >= CoaLimit)
+      return false;
+    CoaCounter++;
+  }
+  unsigned BW1, BW2;
+  if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+    return false;
+  if (MRI->isLiveIn(R1.Reg))
+    return false;
+  if (MRI->isLiveIn(R2.Reg))
+    return false;
+
+  LiveInterval &L1 = LIS->getInterval(R1.Reg);
+  LiveInterval &L2 = LIS->getInterval(R2.Reg);
+  bool Overlap = L1.overlaps(L2);
+
+  DEBUG(dbgs() << "compatible registers: ("
+               << (Overlap ? "overlap" : "disjoint") << ")\n  "
+               << PrintReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
+               << PrintReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
+  if (R1.Sub || R2.Sub)
+    return false;
+  if (Overlap)
+    return false;
+
+  // Coalescing could have a negative impact on scheduling, so try to limit
+  // to some reasonable extent. Only consider coalescing segments, when one
+  // of them does not cross basic block boundaries.
+  if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+    return false;
+
+  MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+  // Move all live segments from L2 to L1.
+  typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+  ValueInfoMap VM;
+  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+    VNInfo *NewVN, *OldVN = I->valno;
+    ValueInfoMap::iterator F = VM.find(OldVN);
+    if (F == VM.end()) {
+      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+      VM.insert(std::make_pair(OldVN, NewVN));
+    } else {
+      NewVN = F->second;
+    }
+    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+  }
+  while (L2.begin() != L2.end())
+    L2.removeSegment(*L2.begin());
+
+  updateKillFlags(R1.Reg, L1);
+  DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+  L1.verify();
+
+  return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+  SmallVector<MachineInstr*,16> Condsets;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &B = *I;
+    for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+      MachineInstr *MI = &*J;
+      if (!isCondset(MI))
+        continue;
+      MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+      if (!S1.isReg() && !S2.isReg())
+        continue;
+      Condsets.push_back(MI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+    MachineInstr *CI = Condsets[i];
+    RegisterRef RD = CI->getOperand(0);
+    RegisterRef RP = CI->getOperand(1);
+    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+    bool Done = false;
+    // Consider this case:
+    //   vreg1 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = C2_mux ..., vreg1, vreg2
+    // If vreg0 was coalesced with vreg1, we could end up with the following
+    // code:
+    //   vreg0 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = A2_tfrf ..., vreg2
+    // which will later become:
+    //   vreg0 = instr1 ...
+    //   vreg0 = instr2_cNotPt ...
+    // i.e. there will be an unconditional definition (instr1) of vreg0
+    // followed by a conditional one. The output dependency was there before
+    // and it unavoidable, but if instr1 is predicable, we will no longer be
+    // able to predicate it here.
+    // To avoid this scenario, don't coalesce the destination register with
+    // a source register that is defined by a predicable instruction.
+    if (S1.isReg()) {
+      RegisterRef RS = S1;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S1));
+    }
+    if (!Done && S2.isReg()) {
+      RegisterRef RS = S2;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S2));
+    }
+    Changed |= Done;
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  TRI = MF.getSubtarget().getRegisterInfo();
+  LIS = &getAnalysis<LiveIntervals>();
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Try to coalesce the target of a mux with one of its sources.
+  // This could eliminate a register copy in some circumstances.
+  Changed |= coalesceSegments(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    // First, simply split all muxes into a pair of conditional transfers
+    // and update the live intervals to reflect the new arrangement.
+    // This is done mainly to make the live interval update simpler, than it
+    // would be while trying to predicate instructions at the same time.
+    Changed |= splitInBlock(*I);
+    // Traverse all blocks and collapse predicable instructions feeding
+    // conditional transfers into predicated instructions.
+    // Walk over all the instructions again, so we may catch pre-existing
+    // cases that were not created in the previous step.
+    Changed |= predicateInBlock(*I);
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    postprocessUndefImplicitUses(*I);
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Condsets";
+  PassInfo *PI = new PassInfo(Name, "expand-condsets",
+        &HexagonExpandCondsets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+  return new HexagonExpandCondsets();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 0000000..6e2dbc0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,357 @@
+//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly.  So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register.  This code currently does not use the register
+// scavenger mechanism available in the allocator.  There are two registers
+// reserved to allow spilling/restoring predicate registers.  One is used to
+// hold the predicate value.  The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+namespace llvm {
+  FunctionPass *createHexagonExpandPredSpillCode();
+  void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
+}
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ public:
+    static char ID;
+    HexagonExpandPredSpillCode() : MachineFunctionPass(ID) {
+      PassRegistry &Registry = *PassRegistry::getPassRegistry();
+      initializeHexagonExpandPredSpillCodePass(Registry);
+    }
+
+    const char *getPassName() const override {
+      return "Hexagon Expand Predicate Spill Code";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+  const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>();
+  const HexagonInstrInfo *TII = QST.getInstrInfo();
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+          Opc == Hexagon::S2_storerh_pci_pseudo ||
+          Opc == Hexagon::S2_storeri_pci_pseudo ||
+          Opc == Hexagon::S2_storerd_pci_pseudo ||
+          Opc == Hexagon::S2_storerf_pci_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::S2_storerd_pci_pseudo)
+          Opcode = Hexagon::S2_storerd_pci;
+        else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+          Opcode = Hexagon::S2_storeri_pci;
+        else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+          Opcode = Hexagon::S2_storerh_pci;
+        else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+          Opcode = Hexagon::S2_storerf_pci;
+        else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+          Opcode = Hexagon::S2_storerb_pci;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+        MachineOperand &Op4 = MI->getOperand(4);
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op3);
+        // Replace the pseude circ_ldd by the real circ_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op4);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        NewMI->addOperand(Op2);
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+                 Opc == Hexagon::L2_loadri_pci_pseudo ||
+                 Opc == Hexagon::L2_loadrh_pci_pseudo ||
+                 Opc == Hexagon::L2_loadruh_pci_pseudo||
+                 Opc == Hexagon::L2_loadrb_pci_pseudo ||
+                 Opc == Hexagon::L2_loadrub_pci_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+          Opcode = Hexagon::L2_loadrd_pci;
+        else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+          Opcode = Hexagon::L2_loadri_pci;
+        else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+          Opcode = Hexagon::L2_loadrh_pci;
+        else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+          Opcode = Hexagon::L2_loadruh_pci;
+        else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+          Opcode = Hexagon::L2_loadrb_pci;
+        else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+          Opcode = Hexagon::L2_loadrub_pci;
+        else
+          llvm_unreachable("wrong Opc");
+
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+        MachineOperand &Op5 = MI->getOperand(5);
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op4);
+        // Replace the pseude circ_ldd by the real circ_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op2);
+        NewMI->addOperand(Op5);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadri_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadruh_pbr_pseudo||
+                 Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+                 Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrd_pbr;
+        else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+          Opcode = Hexagon::L2_loadri_pbr;
+        else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrh_pbr;
+        else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+          Opcode = Hexagon::L2_loadruh_pbr;
+        else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrb_pbr;
+        else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+          Opcode = Hexagon::L2_loadrub_pbr;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op4);
+        // Replace the pseudo brev_ldd by the real brev_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op2);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+                 Opc == Hexagon::S2_storeri_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerh_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerb_pbr_pseudo ||
+                 Opc == Hexagon::S2_storerf_pbr_pseudo) {
+        unsigned Opcode;
+        if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+          Opcode = Hexagon::S2_storerd_pbr;
+        else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+          Opcode = Hexagon::S2_storeri_pbr;
+        else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+          Opcode = Hexagon::S2_storerh_pbr;
+        else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+          Opcode = Hexagon::S2_storerf_pbr;
+        else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+          Opcode = Hexagon::S2_storerb_pbr;
+        else
+          llvm_unreachable("wrong Opc");
+        MachineOperand &Op0 = MI->getOperand(0);
+        MachineOperand &Op1 = MI->getOperand(1);
+        MachineOperand &Op2 = MI->getOperand(2);
+        MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+        // Emit a "C6 = Rn, C6 is the control register for M0".
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+                Hexagon::C6)->addOperand(Op3);
+        // Replace the pseudo brev_ldd by the real brev_ldd.
+        MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+                                      TII->get(Opcode));
+        NewMI->addOperand(Op0);
+        NewMI->addOperand(Op1);
+        NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+                                                    false, /*isDef*/
+                                                    false, /*isImpl*/
+                                                    true   /*isKill*/));
+        NewMI->addOperand(Op2);
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::STriw_pred) {
+        // STriw_pred [R30], ofst, SrcReg;
+        unsigned FP = MI->getOperand(0).getReg();
+        assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(1).isImm() && "Not an offset");
+        int Offset = MI->getOperand(1).getImm();
+        int SrcReg = MI->getOperand(2).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+               "Not a predicate register");
+        if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) {
+          if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::S2_storeri_io))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                          TII->get(Hexagon::S2_storeri_io))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0)
+              .addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr),
+                    HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+          BuildMI(*MBB, MII, MI->getDebugLoc(),
+                        TII->get(Hexagon::S2_storeri_io)).
+                    addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::LDriw_pred) {
+        // DstReg = LDriw_pred [R30], ofst.
+        int DstReg = MI->getOperand(0).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+               "Not a predicate register");
+        unsigned FP = MI->getOperand(1).getReg();
+        assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(2).isImm() && "Not an offset");
+        int Offset = MI->getOperand(2).getImm();
+        if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) {
+          if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP)
+              .addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io),
+                    HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp),
+                    DstReg).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      }
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Predicate Spill Code";
+  PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
+                              &HexagonExpandPredSpillCode::ID,
+                              nullptr, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonExpandPredSpillCode() {
+  return new HexagonExpandPredSpillCode();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 0000000..d0c7f9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,188 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the instruction needs to use a constant extender.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxLoopRange(
+    "hexagon-loop-range", cl::Hidden, cl::init(200),
+    cl::desc("Restrict range of loopN instructions (testing only)"));
+
+namespace llvm {
+  FunctionPass *createHexagonFixupHwLoops();
+  void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonFixupHwLoops : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+      initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    const char *getPassName() const override {
+      return "Hexagon Hardware Loop Fixup";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// \brief Check the offset between each loop instruction and
+    /// the loop basic block to determine if we can use the LOOP instruction
+    /// or if we need to set the LC/SA registers explicitly.
+    bool fixupLoopInstrs(MachineFunction &MF);
+
+    /// \brief Replace loop instruction with the constant extended
+    /// version if the loop label is too far from the loop instruction.
+    void useExtLoopInstr(MachineFunction &MF,
+                         MachineBasicBlock::iterator &MII);
+  };
+
+  char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+                "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+  return new HexagonFixupHwLoops();
+}
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::J2_loop0r ||
+         MI->getOpcode() == Hexagon::J2_loop0i ||
+         MI->getOpcode() == Hexagon::J2_loop1r ||
+         MI->getOpcode() == Hexagon::J2_loop1i;
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+  return fixupLoopInstrs(MF);
+}
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count).  This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks.  The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+  // Offset of the current instruction from the start.
+  unsigned InstOffset = 0;
+  // Map for each basic block to it's first instruction.
+  DenseMap<const MachineBasicBlock *, unsigned> BlockToInstOffset;
+
+  const HexagonInstrInfo *HII =
+      static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  // First pass - compute the offset of each basic block.
+  for (const MachineBasicBlock &MBB : MF) {
+    if (MBB.getAlignment()) {
+      // Although we don't know the exact layout of the final code, we need
+      // to account for alignment padding somehow. This heuristic pads each
+      // aligned basic block according to the alignment value.
+      int ByteAlign = (1u << MBB.getAlignment()) - 1;
+      InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
+    }
+
+    BlockToInstOffset[&MBB] = InstOffset;
+    for (const MachineInstr &MI : MBB)
+      InstOffset += HII->getSize(&MI);
+  }
+
+  // Second pass - check each loop instruction to see if it needs to be
+  // converted.
+  InstOffset = 0;
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF) {
+    InstOffset = BlockToInstOffset[&MBB];
+
+    // Loop over all the instructions.
+    MachineBasicBlock::iterator MII = MBB.begin();
+    MachineBasicBlock::iterator MIE = MBB.end();
+    while (MII != MIE) {
+      InstOffset += HII->getSize(&*MII);
+      if (MII->isDebugValue()) {
+        ++MII;
+        continue;
+      }
+      if (isHardwareLoop(MII)) {
+        assert(MII->getOperand(0).isMBB() &&
+               "Expect a basic block as loop operand");
+        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+        if ((unsigned)abs(diff) > MaxLoopRange) {
+          useExtLoopInstr(MF, MII);
+          MII = MBB.erase(MII);
+          Changed = true;
+        } else {
+          ++MII;
+        }
+      } else {
+        ++MII;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+/// \brief Replace loop instructions with the constant extended version.
+void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
+                                          MachineBasicBlock::iterator &MII) {
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  MachineBasicBlock *MBB = MII->getParent();
+  DebugLoc DL = MII->getDebugLoc();
+  MachineInstrBuilder MIB;
+  unsigned newOp;
+  switch (MII->getOpcode()) {
+  case Hexagon::J2_loop0r:
+    newOp = Hexagon::J2_loop0rext;
+    break;
+  case Hexagon::J2_loop0i:
+    newOp = Hexagon::J2_loop0iext;
+    break;
+  case Hexagon::J2_loop1r:
+    newOp = Hexagon::J2_loop1rext;
+    break;
+  case Hexagon::J2_loop1i:
+    newOp = Hexagon::J2_loop1iext;
+    break;
+  default:
+    llvm_unreachable("Invalid Hardware Loop Instruction.");
+  }
+  MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
+
+  for (unsigned i = 0; i < MII->getNumOperands(); ++i)
+    MIB.addOperand(MII->getOperand(i));
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 0000000..7a52a1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,1479 @@
+//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-pei"
+
+#include "HexagonFrameLowering.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+// Hexagon stack frame layout as defined by the ABI:
+//
+//                                                       Incoming arguments
+//                                                       passed via stack
+//                                                                      |
+//                                                                      |
+//        SP during function's                 FP during function's     |
+//    +-- runtime (top of stack)               runtime (bottom) --+     |
+//    |                                                           |     |
+// --++---------------------+------------------+-----------------++-+-------
+//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
+//   |   called functions   |  local objects   |  local objects  |FP|
+// --+----------------------+------------------+-----------------+--+-------
+//    <-    size known    -> <- size unknown -> <- size known  ->
+//
+// Low address                                                 High address
+//
+// <--- stack growth
+//
+//
+// - In any circumstances, the outgoing function arguments are always accessi-
+//   ble using the SP, and the incoming arguments are accessible using the FP.
+// - If the local objects are not aligned, they can always be accessed using
+//   the FP.
+// - If there are no variable-sized objects, the local objects can always be
+//   accessed using the SP, regardless whether they are aligned or not. (The
+//   alignment padding will be at the bottom of the stack (highest address),
+//   and so the offset with respect to the SP will be known at the compile-
+//   -time.)
+//
+// The only complication occurs if there are both, local aligned objects, and
+// dynamically allocated (variable-sized) objects. The alignment pad will be
+// placed between the FP and the local objects, thus preventing the use of the
+// FP to access the local objects. At the same time, the variable-sized objects
+// will be between the SP and the local objects, thus introducing an unknown
+// distance from the SP to the locals.
+//
+// To avoid this problem, a new register is created that holds the aligned
+// address of the bottom of the stack, referred in the sources as AP (aligned
+// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
+// that aligns AP to the required boundary (a maximum of the alignments of
+// all stack objects, fixed- and variable-sized). All local objects[1] will
+// then use AP as the base pointer.
+// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
+// their name from being allocated at fixed locations on the stack, relative
+// to the FP. In the presence of dynamic allocation and local alignment, such
+// objects can only be accessed through the FP.
+//
+// Illustration of the AP:
+//                                                                FP --+
+//                                                                     |
+// ---------------+---------------------+-----+-----------------------++-+--
+//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
+//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
+// ---------------+---------------------+-----+-----------------+-----+--+--
+//                                      |<-- Multiple of the -->|
+//                                           stack alignment    +-- AP
+//
+// The AP is set up at the beginning of the function. Since it is not a dedi-
+// cated (reserved) register, it needs to be kept live throughout the function
+// to be available as the base register for local object accesses.
+// Normally, an address of a stack objects is obtained by a pseudo-instruction
+// TFR_FI. To access local objects with the AP register present, a different
+// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
+// argument compared to TFR_FI: the first input register is the AP register.
+// This keeps the register live between its definition and its uses.
+
+// The AP register is originally set up using pseudo-instruction ALIGNA:
+//   AP = ALIGNA A
+// where
+//   A  - required stack alignment
+// The alignment value must be the maximum of all alignments required by
+// any stack object.
+
+// The dynamic allocation uses a pseudo-instruction ALLOCA:
+//   Rd = ALLOCA Rs, A
+// where
+//   Rd - address of the allocated space
+//   Rs - minimum size (the actual allocated can be larger to accommodate
+//        alignment)
+//   A  - required alignment
+
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
+    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
+
+
+static cl::opt<int> NumberScavengerSlots("number-scavenger-slots",
+    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
+    cl::ZeroOrMore);
+
+static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
+    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
+    cl::init(6), cl::ZeroOrMore);
+
+static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
+    cl::Hidden, cl::desc("Specify Os spill func threshold"),
+    cl::init(1), cl::ZeroOrMore);
+
+static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
+    cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Enable stack frame shrink wrapping"));
+
+static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
+    cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
+    "shrink-wraps"));
+
+static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
+    cl::Hidden, cl::desc("Use allocframe more conservatively"));
+
+
+namespace llvm {
+  void initializeHexagonCallFrameInformationPass(PassRegistry&);
+  FunctionPass *createHexagonCallFrameInformation();
+}
+
+namespace {
+  class HexagonCallFrameInformation : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
+      PassRegistry &PR = *PassRegistry::getPassRegistry();
+      initializeHexagonCallFrameInformationPass(PR);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+  };
+
+  char HexagonCallFrameInformation::ID = 0;
+}
+
+bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
+  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+  bool NeedCFI = MF.getMMI().hasDebugInfo() ||
+                 MF.getFunction()->needsUnwindTableEntry();
+
+  if (!NeedCFI)
+    return false;
+  HFI.insertCFIInstructions(MF);
+  return true;
+}
+
+INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
+                "Hexagon call frame information", false, false)
+
+FunctionPass *llvm::createHexagonCallFrameInformation() {
+  return new HexagonCallFrameInformation();
+}
+
+
+namespace {
+  /// Map a register pair Reg to the subregister that has the greater "number",
+  /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
+  unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
+                                  bool hireg = true) {
+    if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
+      return Reg;
+
+    unsigned RegNo = 0;
+    for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
+      if (hireg) {
+        if (*SubRegs > RegNo)
+          RegNo = *SubRegs;
+      } else {
+        if (!RegNo || *SubRegs < RegNo)
+          RegNo = *SubRegs;
+      }
+    }
+    return RegNo;
+  }
+
+  /// Returns the callee saved register with the largest id in the vector.
+  unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+                                const TargetRegisterInfo &TRI) {
+    assert(Hexagon::R1 > 0 &&
+           "Assume physical registers are encoded as positive integers");
+    if (CSI.empty())
+      return 0;
+
+    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
+    for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
+      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
+      if (Reg > Max)
+        Max = Reg;
+    }
+    return Max;
+  }
+
+  /// Checks if the basic block contains any instruction that needs a stack
+  /// frame to be already in place.
+  bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) {
+    for (auto &I : MBB) {
+      const MachineInstr *MI = &I;
+      if (MI->isCall())
+        return true;
+      unsigned Opc = MI->getOpcode();
+      switch (Opc) {
+        case Hexagon::ALLOCA:
+        case Hexagon::ALIGNA:
+          return true;
+        default:
+          break;
+      }
+      // Check individual operands.
+      for (const MachineOperand &MO : MI->operands()) {
+        // While the presence of a frame index does not prove that a stack
+        // frame will be required, all frame indexes should be within alloc-
+        // frame/deallocframe. Otherwise, the code that translates a frame
+        // index into an offset would have to be aware of the placement of
+        // the frame creation/destruction instructions.
+        if (MO.isFI())
+          return true;
+        if (!MO.isReg())
+          continue;
+        unsigned R = MO.getReg();
+        // Virtual registers will need scavenging, which then may require
+        // a stack slot.
+        if (TargetRegisterInfo::isVirtualRegister(R))
+          return true;
+        if (CSR[R])
+          return true;
+      }
+    }
+    return false;
+  }
+
+  /// Returns true if MBB has a machine instructions that indicates a tail call
+  /// in the block.
+  bool hasTailCall(const MachineBasicBlock &MBB) {
+    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+    unsigned RetOpc = I->getOpcode();
+    return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
+  }
+
+  /// Returns true if MBB contains an instruction that returns.
+  bool hasReturn(const MachineBasicBlock &MBB) {
+    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
+      if (I->isReturn())
+        return true;
+    return false;
+  }
+}
+
+
+/// Implements shrink-wrapping of the stack frame. By default, stack frame
+/// is created in the function entry block, and is cleaned up in every block
+/// that returns. This function finds alternate blocks: one for the frame
+/// setup (prolog) and one for the cleanup (epilog).
+void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
+      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
+  static unsigned ShrinkCounter = 0;
+
+  if (ShrinkLimit.getPosition()) {
+    if (ShrinkCounter >= ShrinkLimit)
+      return;
+    ShrinkCounter++;
+  }
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  MachineDominatorTree MDT;
+  MDT.runOnMachineFunction(MF);
+  MachinePostDominatorTree MPT;
+  MPT.runOnMachineFunction(MF);
+
+  typedef DenseMap<unsigned,unsigned> UnsignedMap;
+  UnsignedMap RPO;
+  typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+  RPOTType RPOT(&MF);
+  unsigned RPON = 0;
+  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    RPO[(*I)->getNumber()] = RPON++;
+
+  // Don't process functions that have loops, at least for now. Placement
+  // of prolog and epilog must take loop structure into account. For simpli-
+  // city don't do it right now.
+  for (auto &I : MF) {
+    unsigned BN = RPO[I.getNumber()];
+    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+      // If found a back-edge, return.
+      if (RPO[(*SI)->getNumber()] <= BN)
+        return;
+    }
+  }
+
+  // Collect the set of blocks that need a stack frame to execute. Scan
+  // each block for uses/defs of callee-saved registers, calls, etc.
+  SmallVector<MachineBasicBlock*,16> SFBlocks;
+  BitVector CSR(Hexagon::NUM_TARGET_REGS);
+  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
+    CSR[*P] = true;
+
+  for (auto &I : MF)
+    if (needsStackFrame(I, CSR))
+      SFBlocks.push_back(&I);
+
+  DEBUG({
+    dbgs() << "Blocks needing SF: {";
+    for (auto &B : SFBlocks)
+      dbgs() << " BB#" << B->getNumber();
+    dbgs() << " }\n";
+  });
+  // No frame needed?
+  if (SFBlocks.empty())
+    return;
+
+  // Pick a common dominator and a common post-dominator.
+  MachineBasicBlock *DomB = SFBlocks[0];
+  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
+    if (!DomB)
+      break;
+  }
+  MachineBasicBlock *PDomB = SFBlocks[0];
+  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
+    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
+    if (!PDomB)
+      break;
+  }
+  DEBUG({
+    dbgs() << "Computed dom block: BB#";
+    if (DomB) dbgs() << DomB->getNumber();
+    else      dbgs() << "<null>";
+    dbgs() << ", computed pdom block: BB#";
+    if (PDomB) dbgs() << PDomB->getNumber();
+    else       dbgs() << "<null>";
+    dbgs() << "\n";
+  });
+  if (!DomB || !PDomB)
+    return;
+
+  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
+  if (!MDT.dominates(DomB, PDomB)) {
+    DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
+    return;
+  }
+  if (!MPT.dominates(PDomB, DomB)) {
+    DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
+    return;
+  }
+
+  // Finally, everything seems right.
+  PrologB = DomB;
+  EpilogB = PDomB;
+}
+
+/// Perform most of the PEI work here:
+/// - saving/restoring of the callee-saved registers,
+/// - stack frame creation and destruction.
+/// Normally, this work is distributed among various functions, but doing it
+/// in one place allows shrink-wrapping of the stack frame.
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
+  if (EnableShrinkWrapping)
+    findShrunkPrologEpilog(MF, PrologB, EpilogB);
+
+  insertCSRSpillsInBlock(*PrologB, CSI, HRI);
+  insertPrologueInBlock(*PrologB);
+
+  if (EpilogB) {
+    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
+    insertEpilogueInBlock(*EpilogB);
+  } else {
+    for (auto &B : MF)
+      if (B.isReturnBlock())
+        insertCSRRestoresInBlock(B, CSI, HRI);
+
+    for (auto &B : MF)
+      if (B.isReturnBlock())
+        insertEpilogueInBlock(B);
+  }
+}
+
+
+void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+  DebugLoc dl;
+
+  unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
+
+  // Calculate the total stack frame size.
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned FrameSize = MFI->getStackSize();
+  // Round up the max call frame size to the max alignment on the stack.
+  unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign);
+  MFI->setMaxCallFrameSize(MaxCFA);
+
+  FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign);
+  MFI->setStackSize(FrameSize);
+
+  bool AlignStack = (MaxAlign > getStackAlignment());
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned NumBytes = MFI->getStackSize();
+  unsigned SP = HRI.getStackRegister();
+  unsigned MaxCF = MFI->getMaxCallFrameSize();
+  MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+  auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
+  auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
+
+  for (auto MI : AdjustRegs) {
+    assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
+    expandAlloca(MI, HII, SP, MaxCF);
+    MI->eraseFromParent();
+  }
+
+  if (!hasFP(MF))
+    return;
+
+  // Check for overflow.
+  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+  const unsigned int ALLOCFRAME_MAX = 16384;
+
+  // Create a dummy memory operand to avoid allocframe from being treated as
+  // a volatile memory reference.
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+                            4, 4);
+
+  if (NumBytes >= ALLOCFRAME_MAX) {
+    // Emit allocframe(#0).
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+      .addImm(0)
+      .addMemOperand(MMO);
+
+    // Subtract offset from frame pointer.
+    // We use a caller-saved non-parameter register for that.
+    unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
+            CallerSavedReg).addImm(NumBytes);
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
+      .addReg(SP)
+      .addReg(CallerSavedReg);
+  } else {
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
+      .addImm(NumBytes)
+      .addMemOperand(MMO);
+  }
+
+  if (AlignStack) {
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
+        .addReg(SP)
+        .addImm(-int64_t(MaxAlign));
+  }
+}
+
+void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
+  if (!hasFP(MF))
+    return;
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+  unsigned SP = HRI.getStackRegister();
+
+  MachineInstr *RetI = nullptr;
+  for (auto &I : MBB) {
+    if (!I.isReturn())
+      continue;
+    RetI = &I;
+    break;
+  }
+  unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
+
+  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
+  DebugLoc DL;
+  if (InsertPt != MBB.end())
+    DL = InsertPt->getDebugLoc();
+  else if (!MBB.empty())
+    DL = std::prev(MBB.end())->getDebugLoc();
+
+  // Handle EH_RETURN.
+  if (RetOpc == Hexagon::EH_RETURN_JMPR) {
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
+        .addReg(SP)
+        .addReg(Hexagon::R28);
+    return;
+  }
+
+  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
+  // frame instruction if we encounter it.
+  if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
+    MachineBasicBlock::iterator It = RetI;
+    ++It;
+    // Delete all instructions after the RESTORE (except labels).
+    while (It != MBB.end()) {
+      if (!It->isLabel())
+        It = MBB.erase(It);
+      else
+        ++It;
+    }
+    return;
+  }
+
+  // It is possible that the restoring code is a call to a library function.
+  // All of the restore* functions include "deallocframe", so we need to make
+  // sure that we don't add an extra one.
+  bool NeedsDeallocframe = true;
+  if (!MBB.empty() && InsertPt != MBB.begin()) {
+    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
+    unsigned COpc = PrevIt->getOpcode();
+    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
+      NeedsDeallocframe = false;
+  }
+
+  if (!NeedsDeallocframe)
+    return;
+  // If the returning instruction is JMPret, replace it with dealloc_return,
+  // otherwise just add deallocframe. The function could be returning via a
+  // tail call.
+  if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
+    BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
+    return;
+  }
+  unsigned NewOpc = Hexagon::L4_return;
+  MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
+  // Transfer the function live-out registers.
+  NewI->copyImplicitOps(MF, RetI);
+  MBB.erase(RetI);
+}
+
+
+namespace {
+  bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
+    if (!It->isBundle())
+      return It->getOpcode() == Hexagon::S2_allocframe;
+    auto End = It->getParent()->instr_end();
+    MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
+    while (++I != End && I->isBundled())
+      if (I->getOpcode() == Hexagon::S2_allocframe)
+        return true;
+    return false;
+  }
+
+  MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
+    for (auto &I : B)
+      if (IsAllocFrame(I))
+        return I;
+    return B.end();
+  }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
+  for (auto &B : MF) {
+    auto AF = FindAllocFrame(B);
+    if (AF == B.end())
+      continue;
+    insertCFIInstructionsAt(B, ++AF);
+  }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator At) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+
+  // If CFI instructions have debug information attached, something goes
+  // wrong with the final assembly generation: the prolog_end is placed
+  // in a wrong location.
+  DebugLoc DL;
+  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
+
+  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+  if (hasFP(MF)) {
+    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+    // Define CFA via an offset from the value of FP.
+    //
+    //  -8   -4    0 (SP)
+    // --+----+----+---------------------
+    //   | FP | LR |          increasing addresses -->
+    // --+----+----+---------------------
+    //   |         +-- Old SP (before allocframe)
+    //   +-- New FP (after allocframe)
+    //
+    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+    // MCCFIInstruction::createOffset takes the offset without sign change.
+    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(DefCfa));
+    // R31 (return addr) = CFA - 4
+    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(OffR31));
+    // R30 (frame ptr) = CFA - 8
+    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(OffR30));
+  }
+
+  static unsigned int RegsToMove[] = {
+    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
+    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
+    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
+    Hexagon::NoRegister
+  };
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
+    unsigned Reg = RegsToMove[i];
+    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
+      return C.getReg() == Reg;
+    };
+    auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+    if (F == CSI.end())
+      continue;
+
+    // Subtract 8 to make room for R30 and R31, which are added above.
+    unsigned FrameReg;
+    int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
+
+    if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
+      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
+      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
+                                                   Offset);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffReg));
+    } else {
+      // Split the double regs into subregs, and generate appropriate
+      // cfi_offsets.
+      // The only reason, we are split double regs is, llvm-mc does not
+      // understand paired registers for cfi_offset.
+      // Eg .cfi_offset r1:0, -64
+
+      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
+      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
+                                                  Offset+4);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffHi));
+      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
+                                                  Offset);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffLo));
+    }
+  }
+}
+
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+  auto &MFI = *MF.getFrameInfo();
+  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+  bool HasFixed = MFI.getNumFixedObjects();
+  bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
+                        .getLocalFrameObjectCount();
+  bool HasExtraAlign = HRI.needsStackRealignment(MF);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+
+  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
+  // that this shouldn't be required, but doing so now because gcc does and
+  // gdb can't break at the start of the function without it.  Will remove if
+  // this turns out to be a gdb bug.
+  //
+  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+    return true;
+
+  // By default we want to use SP (since it's always there). FP requires
+  // some setup (i.e. ALLOCFRAME).
+  // Fixed and preallocated objects need FP if the distance from them to
+  // the SP is unknown (as is with alloca or aligna).
+  if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+    return true;
+
+  if (MFI.getStackSize() > 0) {
+    if (UseAllocframe)
+      return true;
+  }
+
+  if (MFI.hasCalls() ||
+      MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+    return true;
+
+  return false;
+}
+
+
+enum SpillKind {
+  SK_ToMem,
+  SK_FromMem,
+  SK_FromMemTailcall
+};
+
+static const char *
+getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) {
+  const char * V4SpillToMemoryFunctions[] = {
+    "__save_r16_through_r17",
+    "__save_r16_through_r19",
+    "__save_r16_through_r21",
+    "__save_r16_through_r23",
+    "__save_r16_through_r25",
+    "__save_r16_through_r27" };
+
+  const char * V4SpillFromMemoryFunctions[] = {
+    "__restore_r16_through_r17_and_deallocframe",
+    "__restore_r16_through_r19_and_deallocframe",
+    "__restore_r16_through_r21_and_deallocframe",
+    "__restore_r16_through_r23_and_deallocframe",
+    "__restore_r16_through_r25_and_deallocframe",
+    "__restore_r16_through_r27_and_deallocframe" };
+
+  const char * V4SpillFromMemoryTailcallFunctions[] = {
+    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
+    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
+  };
+
+  const char **SpillFunc = nullptr;
+
+  switch(SpillType) {
+  case SK_ToMem:
+    SpillFunc = V4SpillToMemoryFunctions;
+    break;
+  case SK_FromMem:
+    SpillFunc = V4SpillFromMemoryFunctions;
+    break;
+  case SK_FromMemTailcall:
+    SpillFunc = V4SpillFromMemoryTailcallFunctions;
+    break;
+  }
+  assert(SpillFunc && "Unknown spill kind");
+
+  // Spill all callee-saved registers up to the highest register used.
+  switch (MaxReg) {
+  case Hexagon::R17:
+    return SpillFunc[0];
+  case Hexagon::R19:
+    return SpillFunc[1];
+  case Hexagon::R21:
+    return SpillFunc[2];
+  case Hexagon::R23:
+    return SpillFunc[3];
+  case Hexagon::R25:
+    return SpillFunc[4];
+  case Hexagon::R27:
+    return SpillFunc[5];
+  default:
+    llvm_unreachable("Unhandled maximum callee save register");
+  }
+  return 0;
+}
+
+/// Adds all callee-saved registers up to MaxReg to the instruction.
+static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
+                                           unsigned MaxReg, bool IsDef) {
+  // Add the callee-saved registers as implicit uses.
+  for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) {
+    MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true);
+    Inst->addOperand(ImpUse);
+  }
+}
+
+
+int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+      int FI, unsigned &FrameReg) const {
+  auto &MFI = *MF.getFrameInfo();
+  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+  // Large parts of this code are shared with HRI::eliminateFrameIndex.
+  int Offset = MFI.getObjectOffset(FI);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  bool HasExtraAlign = HRI.needsStackRealignment(MF);
+  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
+  unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
+  unsigned AP = 0;
+  if (const MachineInstr *AI = getAlignaInstr(MF))
+    AP = AI->getOperand(0).getReg();
+  unsigned FrameSize = MFI.getStackSize();
+
+  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
+  // Use FP at -O0, except when there are objects with extra alignment.
+  // That additional alignment requirement may cause a pad to be inserted,
+  // which will make it impossible to use FP to access objects located
+  // past the pad.
+  if (NoOpt && !HasExtraAlign)
+    UseFP = true;
+  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+    // Fixed and preallocated objects will be located before any padding
+    // so FP must be used to access them.
+    UseFP |= (HasAlloca || HasExtraAlign);
+  } else {
+    if (HasAlloca) {
+      if (HasExtraAlign)
+        UseAP = true;
+      else
+        UseFP = true;
+    }
+  }
+
+  // If FP was picked, then there had better be FP.
+  bool HasFP = hasFP(MF);
+  assert((HasFP || !UseFP) && "This function must have frame pointer");
+
+  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
+  // FP/LR. If the base register is used to access an object across these
+  // 8 bytes, then the offset will need to be adjusted by 8.
+  //
+  // After allocframe:
+  //                    HexagonISelLowering adds 8 to ---+
+  //                    the offsets of all stack-based   |
+  //                    arguments (*)                    |
+  //                                                     |
+  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
+  // ------------------------+-----+------------------------> increasing
+  //     <local objects>     |FP/LR|    <input arguments>     addresses
+  // -----------------+------+-----+------------------------>
+  //                  |      |
+  //    SP/AP point --+      +-- FP points here (**)
+  //    somewhere on
+  //    this side of FP/LR
+  //
+  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
+  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
+
+  // The lowering assumes that FP/LR is present, and so the offsets of
+  // the formal arguments start at 8. If FP/LR is not there we need to
+  // reduce the offset by 8.
+  if (Offset > 0 && !HasFP)
+    Offset -= 8;
+
+  if (UseFP)
+    FrameReg = FP;
+  else if (UseAP)
+    FrameReg = AP;
+  else
+    FrameReg = SP;
+
+  // Calculate the actual offset in the instruction. If there is no FP
+  // (in other words, no allocframe), then SP will not be adjusted (i.e.
+  // there will be no SP -= FrameSize), so the frame size should not be
+  // added to the calculated offset.
+  int RealOffset = Offset;
+  if (!UseFP && !UseAP && HasFP)
+    RealOffset = FrameSize+Offset;
+  return RealOffset;
+}
+
+
+bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
+      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+  if (CSI.empty())
+    return true;
+
+  MachineBasicBlock::iterator MI = MBB.begin();
+  MachineFunction &MF = *MBB.getParent();
+  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+  if (useSpillFunction(MF, CSI)) {
+    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
+    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem);
+    // Call spill function.
+    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+    MachineInstr *SaveRegsCall =
+        BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+          .addExternalSymbol(SpillFun);
+    // Add callee-saved registers as use.
+    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
+    // Add live in registers.
+    for (unsigned I = 0; I < CSI.size(); ++I)
+      MBB.addLiveIn(CSI[I].getReg());
+    return true;
+  }
+
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    // Add live in registers. We treat eh_return callee saved register r0 - r3
+    // specially. They are not really callee saved registers as they are not
+    // supposed to be killed.
+    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
+    int FI = CSI[i].getFrameIdx();
+    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+    if (IsKill)
+      MBB.addLiveIn(Reg);
+  }
+  return true;
+}
+
+
+bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
+      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
+  MachineFunction &MF = *MBB.getParent();
+  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+  if (useRestoreFunction(MF, CSI)) {
+    bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
+    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
+    SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
+    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
+
+    // Call spill function.
+    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
+                                  : MBB.getLastNonDebugInstr()->getDebugLoc();
+    MachineInstr *DeallocCall = nullptr;
+
+    if (HasTC) {
+      unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
+      DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
+          .addExternalSymbol(RestoreFn);
+    } else {
+      // The block has a return.
+      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
+      assert(It->isReturn() && std::next(It) == MBB.end());
+      unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
+      DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
+          .addExternalSymbol(RestoreFn);
+      // Transfer the function live-out registers.
+      DeallocCall->copyImplicitOps(MF, It);
+    }
+    addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true);
+    return true;
+  }
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
+    int FI = CSI[i].getFrameIdx();
+    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
+  }
+  return true;
+}
+
+
+void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF,
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+  unsigned Opc = MI.getOpcode();
+  (void)Opc; // Silence compiler warning.
+  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
+         "Cannot handle this call frame pseudo instruction");
+  MBB.erase(I);
+}
+
+
+void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  // If this function has uses aligned stack and also has variable sized stack
+  // objects, then we need to map all spill slots to fixed positions, so that
+  // they can be accessed through FP. Otherwise they would have to be accessed
+  // via AP, which may not be available at the particular place in the program.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool HasAlloca = MFI->hasVarSizedObjects();
+  bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
+
+  if (!HasAlloca || !NeedsAlign)
+    return;
+
+  unsigned LFS = MFI->getLocalFrameSize();
+  int Offset = -LFS;
+  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
+      continue;
+    int S = MFI->getObjectSize(i);
+    LFS += S;
+    Offset -= S;
+    MFI->mapLocalFrameObject(i, Offset);
+  }
+
+  MFI->setLocalFrameSize(LFS);
+  unsigned A = MFI->getLocalFrameMaxAlign();
+  assert(A <= 8 && "Unexpected local frame alignment");
+  if (A == 0)
+    MFI->setLocalFrameMaxAlign(8);
+  MFI->setUseLocalStackAllocationBlock(true);
+}
+
+/// Returns true if there is no caller saved registers available.
+static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
+                                              const HexagonRegisterInfo &HRI) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF);
+  // Check for an unused caller-saved register.
+  for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
+    MCPhysReg FreeReg = *CallerSavedRegs;
+    if (!MRI.reg_nodbg_empty(FreeReg))
+      continue;
+
+    // Check aliased register usage.
+    bool IsCurrentRegUsed = false;
+    for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
+      if (!MRI.reg_nodbg_empty(*AI)) {
+        IsCurrentRegUsed = true;
+        break;
+      }
+    if (IsCurrentRegUsed)
+      continue;
+
+    // Neither directly used nor used through an aliased register.
+    return false;
+  }
+  // All caller-saved registers are used.
+  return true;
+}
+
+
+/// Replaces the predicate spill code pseudo instructions by valid instructions.
+bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
+      const {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HII = *HST.getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool HasReplacedPseudoInst = false;
+  // Replace predicate spill pseudo instructions by real code.
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+    // Traverse the basic block.
+    MachineBasicBlock::iterator NextII;
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         MII = NextII) {
+      MachineInstr *MI = MII;
+      NextII = std::next(MII);
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::STriw_pred) {
+        HasReplacedPseudoInst = true;
+        // STriw_pred FI, 0, SrcReg;
+        unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+        unsigned SrcReg = MI->getOperand(2).getReg();
+        bool IsOrigSrcRegKilled = MI->getOperand(2).isKill();
+
+        assert(MI->getOperand(0).isFI() && "Expect a frame index");
+        assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+               "Not a predicate register");
+
+        // Insert transfer to general purpose register.
+        //   VirtReg = C2_tfrpr SrcPredReg
+        BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr),
+                VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled));
+
+        // Change instruction to S2_storeri_io.
+        //   S2_storeri_io FI, 0, VirtReg
+        MI->setDesc(HII.get(Hexagon::S2_storeri_io));
+        MI->getOperand(2).setReg(VirtReg);
+        MI->getOperand(2).setIsKill();
+
+      } else if (Opc == Hexagon::LDriw_pred) {
+        // DstReg = LDriw_pred FI, 0
+        MachineOperand &M0 = MI->getOperand(0);
+        if (M0.isDead()) {
+          MBB->erase(MII);
+          continue;
+        }
+
+        unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+        unsigned DestReg = MI->getOperand(0).getReg();
+
+        assert(MI->getOperand(1).isFI() && "Expect a frame index");
+        assert(Hexagon::PredRegsRegClass.contains(DestReg) &&
+               "Not a predicate register");
+
+        // Change instruction to L2_loadri_io.
+        //   VirtReg = L2_loadri_io FI, 0
+        MI->setDesc(HII.get(Hexagon::L2_loadri_io));
+        MI->getOperand(0).setReg(VirtReg);
+
+        // Insert transfer to general purpose register.
+        //   DestReg = C2_tfrrp VirtReg
+        const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp);
+        BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg)
+          .addReg(VirtReg, getKillRegState(true));
+        HasReplacedPseudoInst = true;
+      }
+    }
+  }
+  return HasReplacedPseudoInst;
+}
+
+
+void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                                BitVector &SavedRegs,
+                                                RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HRI = *HST.getRegisterInfo();
+
+  bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
+
+  // If we have a function containing __builtin_eh_return we want to spill and
+  // restore all callee saved registers. Pretend that they are used.
+  if (HasEHReturn) {
+    for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
+         ++CSRegs)
+      SavedRegs.set(*CSRegs);
+  }
+
+  const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
+
+  // Replace predicate register pseudo spill code.
+  bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF);
+
+  // We need to reserve a a spill slot if scavenging could potentially require
+  // spilling a scavenged register.
+  if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) {
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    for (int i=0; i < NumberScavengerSlots; i++)
+      RS->addScavengingFrameIndex(
+        MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment()));
+  }
+}
+
+
+#ifndef NDEBUG
+static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
+  dbgs() << '{';
+  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
+    unsigned R = x;
+    dbgs() << ' ' << PrintReg(R, &TRI);
+  }
+  dbgs() << " }";
+}
+#endif
+
+
+bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
+      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
+               << MF.getFunction()->getName() << '\n');
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
+
+  // Generate a set of unique, callee-saved registers (SRegs), where each
+  // register in the set is maximal in terms of sub-/super-register relation,
+  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
+
+  // (1) For each callee-saved register, add that register and all of its
+  // sub-registers to SRegs.
+  DEBUG(dbgs() << "Initial CS registers: {");
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+    unsigned R = CSI[i].getReg();
+    DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
+    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+      SRegs[*SR] = true;
+  }
+  DEBUG(dbgs() << " }\n");
+  DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (2) For each reserved register, remove that register and all of its
+  // sub- and super-registers from SRegs.
+  BitVector Reserved = TRI->getReservedRegs(MF);
+  for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
+      SRegs[*SR] = false;
+  }
+  DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
+  DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (3) Collect all registers that have at least one sub-register in SRegs,
+  // and also have no sub-registers that are reserved. These will be the can-
+  // didates for saving as a whole instead of their individual sub-registers.
+  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
+  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
+      TmpSup[*SR] = true;
+  }
+  for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
+    unsigned R = x;
+    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
+      if (!Reserved[*SR])
+        continue;
+      TmpSup[R] = false;
+      break;
+    }
+  }
+  DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
+
+  // (4) Include all super-registers found in (3) into SRegs.
+  SRegs |= TmpSup;
+  DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
+  // remove R from SRegs.
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
+      if (!SRegs[*SR])
+        continue;
+      SRegs[R] = false;
+      break;
+    }
+  }
+  DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+
+  // Now, for each register that has a fixed stack slot, create the stack
+  // object for it.
+  CSI.clear();
+
+  typedef TargetFrameLowering::SpillSlot SpillSlot;
+  unsigned NumFixed;
+  int MinOffset = 0;  // CS offsets are negative.
+  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
+  for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
+    if (!SRegs[S->Reg])
+      continue;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
+    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+    MinOffset = std::min(MinOffset, S->Offset);
+    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
+    SRegs[S->Reg] = false;
+  }
+
+  // There can be some registers that don't have fixed slots. For example,
+  // we need to store R0-R3 in functions with exception handling. For each
+  // such register, create a non-fixed stack object.
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
+    int Off = MinOffset - RC->getSize();
+    unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
+    assert(isPowerOf2_32(Align));
+    Off &= -Align;
+    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
+    MinOffset = std::min(MinOffset, Off);
+    CSI.push_back(CalleeSavedInfo(R, FI));
+    SRegs[R] = false;
+  }
+
+  DEBUG({
+    dbgs() << "CS information: {";
+    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+      int FI = CSI[i].getFrameIdx();
+      int Off = MFI->getObjectOffset(FI);
+      dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+      if (Off >= 0)
+        dbgs() << '+';
+      dbgs() << Off;
+    }
+    dbgs() << " }\n";
+  });
+
+#ifndef NDEBUG
+  // Verify that all registers were handled.
+  bool MissedReg = false;
+  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
+    unsigned R = x;
+    dbgs() << PrintReg(R, TRI) << ' ';
+    MissedReg = true;
+  }
+  if (MissedReg)
+    llvm_unreachable("...there are unhandled callee-saved registers!");
+#endif
+
+  return true;
+}
+
+
+void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
+      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
+  MachineBasicBlock &MB = *AI->getParent();
+  DebugLoc DL = AI->getDebugLoc();
+  unsigned A = AI->getOperand(2).getImm();
+
+  // Have
+  //    Rd  = alloca Rs, #A
+  //
+  // If Rs and Rd are different registers, use this sequence:
+  //    Rd  = sub(r29, Rs)
+  //    r29 = sub(r29, Rs)
+  //    Rd  = and(Rd, #-A)    ; if necessary
+  //    r29 = and(r29, #-A)   ; if necessary
+  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
+  // otherwise, do
+  //    Rd  = sub(r29, Rs)
+  //    Rd  = and(Rd, #-A)    ; if necessary
+  //    r29 = Rd
+  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
+
+  MachineOperand &RdOp = AI->getOperand(0);
+  MachineOperand &RsOp = AI->getOperand(1);
+  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
+
+  // Rd = sub(r29, Rs)
+  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
+      .addReg(SP)
+      .addReg(Rs);
+  if (Rs != Rd) {
+    // r29 = sub(r29, Rs)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
+        .addReg(SP)
+        .addReg(Rs);
+  }
+  if (A > 8) {
+    // Rd  = and(Rd, #-A)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
+        .addReg(Rd)
+        .addImm(-int64_t(A));
+    if (Rs != Rd)
+      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
+          .addReg(SP)
+          .addImm(-int64_t(A));
+  }
+  if (Rs == Rd) {
+    // r29 = Rd
+    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
+        .addReg(Rd);
+  }
+  if (CF > 0) {
+    // Rd = add(Rd, #CF)
+    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
+        .addReg(Rd)
+        .addImm(CF);
+  }
+}
+
+
+bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (!MFI->hasVarSizedObjects())
+    return false;
+  unsigned MaxA = MFI->getMaxAlignment();
+  if (MaxA <= getStackAlignment())
+    return false;
+  return true;
+}
+
+
+const MachineInstr *HexagonFrameLowering::getAlignaInstr(
+      const MachineFunction &MF) const {
+  for (auto &B : MF)
+    for (auto &I : B)
+      if (I.getOpcode() == Hexagon::ALIGNA)
+        return &I;
+  return nullptr;
+}
+
+
+// FIXME: Use Function::optForSize().
+inline static bool isOptSize(const MachineFunction &MF) {
+  AttributeSet AF = MF.getFunction()->getAttributes();
+  return AF.hasAttribute(AttributeSet::FunctionIndex,
+                         Attribute::OptimizeForSize);
+}
+
+inline static bool isMinSize(const MachineFunction &MF) {
+  return MF.getFunction()->optForMinSize();
+}
+
+
+/// Determine whether the callee-saved register saves and restores should
+/// be generated via inline code. If this function returns "true", inline
+/// code will be generated. If this function returns "false", additional
+/// checks are performed, which may still lead to the inline code.
+bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
+    return true;
+  if (!isOptSize(MF) && !isMinSize(MF))
+    if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
+      return true;
+
+  // Check if CSI only has double registers, and if the registers form
+  // a contiguous block starting from D8.
+  BitVector Regs(Hexagon::NUM_TARGET_REGS);
+  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
+    unsigned R = CSI[i].getReg();
+    if (!Hexagon::DoubleRegsRegClass.contains(R))
+      return true;
+    Regs[R] = true;
+  }
+  int F = Regs.find_first();
+  if (F != Hexagon::D8)
+    return true;
+  while (F >= 0) {
+    int N = Regs.find_next(F);
+    if (N >= 0 && N != F+1)
+      return true;
+    F = N;
+  }
+
+  return false;
+}
+
+
+bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (shouldInlineCSR(MF, CSI))
+    return false;
+  unsigned NumCSI = CSI.size();
+  if (NumCSI <= 1)
+    return false;
+
+  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
+                                     : SpillFuncThreshold;
+  return Threshold < NumCSI;
+}
+
+
+bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
+      const CSIVect &CSI) const {
+  if (shouldInlineCSR(MF, CSI))
+    return false;
+  unsigned NumCSI = CSI.size();
+  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
+                                     : SpillFuncThreshold;
+  return Threshold < NumCSI;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 0000000..683b303
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,109 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
+
+class HexagonFrameLowering : public TargetFrameLowering {
+public:
+  explicit HexagonFrameLowering()
+      : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {}
+
+  // All of the prolog/epilog functionality, including saving and restoring
+  // callee-saved registers is handled in emitPrologue. This is to have the
+  // logic for shrink-wrapping in one place.
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
+      override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+      override {}
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+      const TargetRegisterInfo *TRI) const override {
+    return true;
+  }
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
+      const TargetRegisterInfo *TRI) const override {
+    return true;
+  }
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+        RegScavenger *RS = nullptr) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+        RegScavenger *RS) const override;
+
+  bool targetHandlesStackFrameRounding() const override {
+    return true;
+  }
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
+  bool hasFP(const MachineFunction &MF) const override;
+
+  const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
+        const override {
+    static const SpillSlot Offsets[] = {
+      { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 },
+      { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 },
+      { Hexagon::R21, -20 }, { Hexagon::R20, -24 }, { Hexagon::D10, -24 },
+      { Hexagon::R23, -28 }, { Hexagon::R22, -32 }, { Hexagon::D11, -32 },
+      { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
+      { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
+    };
+    NumEntries = array_lengthof(Offsets);
+    return Offsets;
+  }
+
+  bool assignCalleeSavedSpillSlots(MachineFunction &MF,
+      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI)
+      const override;
+
+  bool needsAligna(const MachineFunction &MF) const;
+  const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
+
+  void insertCFIInstructions(MachineFunction &MF) const;
+
+private:
+  typedef std::vector<CalleeSavedInfo> CSIVect;
+
+  void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII,
+      unsigned SP, unsigned CF) const;
+  void insertPrologueInBlock(MachineBasicBlock &MBB) const;
+  void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
+  bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+      const HexagonRegisterInfo &HRI) const;
+  bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
+      const HexagonRegisterInfo &HRI) const;
+  void insertCFIInstructionsAt(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator At) const;
+
+  void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
+  bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
+  bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const;
+
+  void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
+      MachineBasicBlock *&EpilogB) const;
+
+  bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
+  bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
+  bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
new file mode 100644
index 0000000..f26e2ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -0,0 +1,259 @@
+//===--- HexagonGenExtract.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
+  cl::Hidden, cl::desc("Cutoff for generating \"extract\""
+  " instructions"));
+
+// This prevents generating extract instructions that have the offset of 0.
+// One of the reasons for "extract" is to put a sequence of bits in a regis-
+// ter, starting at offset 0 (so that these bits can then be used by an
+// "insert"). If the bits are already at offset 0, it is better not to gene-
+// rate "extract", since logical bit operations can be merged into compound
+// instructions (as opposed to "extract").
+static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
+  cl::desc("No extract instruction with offset 0"));
+
+static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
+  cl::desc("Require & in extract patterns"));
+
+namespace llvm {
+  void initializeHexagonGenExtractPass(PassRegistry&);
+  FunctionPass *createHexagonGenExtract();
+}
+
+
+namespace {
+  class HexagonGenExtract : public FunctionPass {
+  public:
+    static char ID;
+    HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
+      initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const override {
+      return "Hexagon generate \"extract\" instructions";
+    }
+    virtual bool runOnFunction(Function &F) override;
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+  private:
+    bool visitBlock(BasicBlock *B);
+    bool convert(Instruction *In);
+
+    unsigned ExtractCount;
+    DominatorTree *DT;
+  };
+
+  char HexagonGenExtract::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
+  "\"extract\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
+  "\"extract\" instructions", false, false)
+
+
+bool HexagonGenExtract::convert(Instruction *In) {
+  using namespace PatternMatch;
+  Value *BF = 0;
+  ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+  BasicBlock *BB = In->getParent();
+  LLVMContext &Ctx = BB->getContext();
+  bool LogicalSR;
+
+  // (and (shl (lshr x, #sr), #sl), #m)
+  LogicalSR = true;
+  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                               m_ConstantInt(CSL)),
+                         m_ConstantInt(CM)));
+
+  if (!Match) {
+    // (and (shl (ashr x, #sr), #sl), #m)
+    LogicalSR = false;
+    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)),
+                      m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    // (and (shl x, #sl), #m)
+    LogicalSR = true;
+    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
+                      m_ConstantInt(CM)));
+    if (Match && NoSR0)
+      return false;
+  }
+  if (!Match) {
+    // (and (lshr x, #sr), #m)
+    LogicalSR = true;
+    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    // (and (ashr x, #sr), #m)
+    LogicalSR = false;
+    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    CM = 0;
+    // (shl (lshr x, #sr), #sl)
+    LogicalSR = true;
+    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)));
+  }
+  if (!Match) {
+    CM = 0;
+    // (shl (ashr x, #sr), #sl)
+    LogicalSR = false;
+    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)));
+  }
+  if (!Match)
+    return false;
+
+  Type *Ty = BF->getType();
+  if (!Ty->isIntegerTy())
+    return false;
+  unsigned BW = Ty->getPrimitiveSizeInBits();
+  if (BW != 32 && BW != 64)
+    return false;
+
+  uint32_t SR = CSR->getZExtValue();
+  uint32_t SL = CSL->getZExtValue();
+
+  if (!CM) {
+    // If there was no and, and the shift left did not remove all potential
+    // sign bits created by the shift right, then extractu cannot reproduce
+    // this value.
+    if (!LogicalSR && (SR > SL))
+      return false;
+    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
+    CM = ConstantInt::get(Ctx, A);
+  }
+
+  // CM is the shifted-left mask. Shift it back right to remove the zero
+  // bits on least-significant positions.
+  APInt M = CM->getValue().lshr(SL);
+  uint32_t T = M.countTrailingOnes();
+
+  // During the shifts some of the bits will be lost. Calculate how many
+  // of the original value will remain after shift right and then left.
+  uint32_t U = BW - std::max(SL, SR);
+  // The width of the extracted field is the minimum of the original bits
+  // that remain after the shifts and the number of contiguous 1s in the mask.
+  uint32_t W = std::min(U, T);
+  if (W == 0)
+    return false;
+
+  // Check if the extracted bits are contained within the mask that it is
+  // and-ed with. The extract operation will copy these bits, and so the
+  // mask cannot any holes in it that would clear any of the bits of the
+  // extracted field.
+  if (!LogicalSR) {
+    // If the shift right was arithmetic, it could have included some 1 bits.
+    // It is still ok to generate extract, but only if the mask eliminates
+    // those bits (i.e. M does not have any bits set beyond U).
+    APInt C = APInt::getHighBitsSet(BW, BW-U);
+    if (M.intersects(C) || !APIntOps::isMask(W, M))
+      return false;
+  } else {
+    // Check if M starts with a contiguous sequence of W times 1 bits. Get
+    // the low U bits of M (which eliminates the 0 bits shifted in on the
+    // left), and check if the result is APInt's "mask":
+    if (!APIntOps::isMask(W, M.getLoBits(U)))
+      return false;
+  }
+
+  IRBuilder<> IRB(In);
+  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
+                                   : Intrinsic::hexagon_S2_extractup;
+  Module *Mod = BB->getParent()->getParent();
+  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
+  if (SL != 0)
+    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
+  In->replaceAllUsesWith(NewIn);
+  return true;
+}
+
+
+bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+  // Depth-first, bottom-up traversal.
+  DomTreeNode *DTN = DT->getNode(B);
+  typedef GraphTraits<DomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType Iter;
+  for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+    visitBlock((*I)->getBlock());
+
+  // Allow limiting the number of generated extracts for debugging purposes.
+  bool HasCutoff = ExtractCutoff.getPosition();
+  unsigned Cutoff = ExtractCutoff;
+
+  bool Changed = false;
+  BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
+  while (true) {
+    if (HasCutoff && (ExtractCount >= Cutoff))
+      return Changed;
+    bool Last = (I == Begin);
+    if (!Last)
+      NextI = std::prev(I);
+    Instruction *In = &*I;
+    bool Done = convert(In);
+    if (HasCutoff && Done)
+      ExtractCount++;
+    Changed |= Done;
+    if (Last)
+      break;
+    I = NextI;
+  }
+  return Changed;
+}
+
+
+bool HexagonGenExtract::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  bool Changed;
+
+  // Traverse the function bottom-up, to see super-expressions before their
+  // sub-expressions.
+  BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
+  Changed = visitBlock(Entry);
+
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenExtract() {
+  return new HexagonGenExtract();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
new file mode 100644
index 0000000..64a2b6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -0,0 +1,1599 @@
+//===--- HexagonGenInsert.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexinsert"
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
+// The distance cutoff is selected based on the precheckin-perf results:
+// cutoffs 20, 25, 35, and 40 are worse than 30.
+static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
+  "generation."));
+
+static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
+static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
+  "generation"));
+
+static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+// Whether to construct constant values via "insert". Could eliminate constant
+// extenders, but often not practical.
+static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+
+namespace {
+  // The preprocessor gets confused when the DEBUG macro is passed larger
+  // chunks of code. Use this function to detect debugging.
+  inline bool isDebug() {
+#ifndef NDEBUG
+    return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE);
+#else
+    return false;
+#endif
+  }
+}
+
+
+namespace {
+  // Set of virtual registers, based on BitVector.
+  struct RegisterSet : private BitVector {
+    RegisterSet() = default;
+    explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+
+    using BitVector::clear;
+
+    unsigned find_first() const {
+      int First = BitVector::find_first();
+      if (First < 0)
+        return 0;
+      return x2v(First);
+    }
+
+    unsigned find_next(unsigned Prev) const {
+      int Next = BitVector::find_next(v2x(Prev));
+      if (Next < 0)
+        return 0;
+      return x2v(Next);
+    }
+
+    RegisterSet &insert(unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return static_cast<RegisterSet&>(BitVector::set(Idx));
+    }
+    RegisterSet &remove(unsigned R) {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return *this;
+      return static_cast<RegisterSet&>(BitVector::reset(Idx));
+    }
+
+    RegisterSet &insert(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+    }
+    RegisterSet &remove(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::reset(Rs));
+    }
+
+    reference operator[](unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return BitVector::operator[](Idx);
+    }
+    bool operator[](unsigned R) const {
+      unsigned Idx = v2x(R);
+      assert(Idx < size());
+      return BitVector::operator[](Idx);
+    }
+    bool has(unsigned R) const {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return false;
+      return BitVector::test(Idx);
+    }
+
+    bool empty() const {
+      return !BitVector::any();
+    }
+    bool includes(const RegisterSet &Rs) const {
+      // A.BitVector::test(B)  <=>  A-B != {}
+      return !Rs.BitVector::test(*this);
+    }
+    bool intersects(const RegisterSet &Rs) const {
+      return BitVector::anyCommon(Rs);
+    }
+
+  private:
+    void ensure(unsigned Idx) {
+      if (size() <= Idx)
+        resize(std::max(Idx+1, 32U));
+    }
+    static inline unsigned v2x(unsigned v) {
+      return TargetRegisterInfo::virtReg2Index(v);
+    }
+    static inline unsigned x2v(unsigned x) {
+      return TargetRegisterInfo::index2VirtReg(x);
+    }
+  };
+
+
+  struct PrintRegSet {
+    PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+      : RS(S), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS,
+          const PrintRegSet &P);
+  private:
+    const RegisterSet &RS;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+    OS << '{';
+    for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+      OS << ' ' << PrintReg(R, P.TRI);
+    OS << " }";
+    return OS;
+  }
+}
+
+
+namespace {
+  // A convenience class to associate unsigned numbers (such as virtual
+  // registers) with unsigned numbers.
+  struct UnsignedMap : public DenseMap<unsigned,unsigned> {
+    UnsignedMap() : BaseType() {}
+  private:
+    typedef DenseMap<unsigned,unsigned> BaseType;
+  };
+
+  // A utility to establish an ordering between virtual registers:
+  // VRegA < VRegB  <=>  RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
+  // This is meant as a cache for the ordering of virtual registers defined
+  // by a potentially expensive comparison function, or obtained by a proce-
+  // dure that should not be repeated each time two registers are compared.
+  struct RegisterOrdering : public UnsignedMap {
+    RegisterOrdering() : UnsignedMap() {}
+    unsigned operator[](unsigned VR) const {
+      const_iterator F = find(VR);
+      assert(F != end());
+      return F->second;
+    }
+    // Add operator(), so that objects of this class can be used as
+    // comparators in std::sort et al.
+    bool operator() (unsigned VR1, unsigned VR2) const {
+      return operator[](VR1) < operator[](VR2);
+    }
+  };
+}
+
+
+namespace {
+  // Ordering of bit values. This class does not have operator[], but
+  // is supplies a comparison operator() for use in std:: algorithms.
+  // The order is as follows:
+  // - 0 < 1 < ref
+  // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
+  //   or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
+  struct BitValueOrdering {
+    BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
+    bool operator() (const BitTracker::BitValue &V1,
+          const BitTracker::BitValue &V2) const;
+    const RegisterOrdering &BaseOrd;
+  };
+}
+
+
+bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
+      const BitTracker::BitValue &V2) const {
+  if (V1 == V2)
+    return false;
+  // V1==0 => true, V2==0 => false
+  if (V1.is(0) || V2.is(0))
+    return V1.is(0);
+  // Neither of V1,V2 is 0, and V1!=V2.
+  // V2==1 => false, V1==1 => true
+  if (V2.is(1) || V1.is(1))
+    return !V2.is(1);
+  // Both V1,V2 are refs.
+  unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
+  if (Ind1 != Ind2)
+    return Ind1 < Ind2;
+  // If V1.Pos==V2.Pos
+  assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
+  return V1.RefI.Pos < V2.RefI.Pos;
+}
+
+
+namespace {
+  // Cache for the BitTracker's cell map. Map lookup has a logarithmic
+  // complexity, this class will memoize the lookup results to reduce
+  // the access time for repeated lookups of the same cell.
+  struct CellMapShadow {
+    CellMapShadow(const BitTracker &T) : BT(T) {}
+    const BitTracker::RegisterCell &lookup(unsigned VR) {
+      unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
+      // Grow the vector to at least 32 elements.
+      if (RInd >= CVect.size())
+        CVect.resize(std::max(RInd+16, 32U), 0);
+      const BitTracker::RegisterCell *CP = CVect[RInd];
+      if (CP == 0)
+        CP = CVect[RInd] = &BT.lookup(VR);
+      return *CP;
+    }
+
+    const BitTracker &BT;
+
+  private:
+    typedef std::vector<const BitTracker::RegisterCell*> CellVectType;
+    CellVectType CVect;
+  };
+}
+
+
+namespace {
+  // Comparator class for lexicographic ordering of virtual registers
+  // according to the corresponding BitTracker::RegisterCell objects.
+  struct RegisterCellLexCompare {
+    RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
+      : BitOrd(BO), CM(M) {}
+    bool operator() (unsigned VR1, unsigned VR2) const;
+  private:
+    const BitValueOrdering &BitOrd;
+    CellMapShadow &CM;
+  };
+
+  // Comparator class for lexicographic ordering of virtual registers
+  // according to the specified bits of the corresponding BitTracker::
+  // RegisterCell objects.
+  // Specifically, this class will be used to compare bit B of a register
+  // cell for a selected virtual register R with bit N of any register
+  // other than R.
+  struct RegisterCellBitCompareSel {
+    RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
+          const BitValueOrdering &BO, CellMapShadow &M)
+      : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
+    bool operator() (unsigned VR1, unsigned VR2) const;
+  private:
+    const unsigned SelR, SelB;
+    const unsigned BitN;
+    const BitValueOrdering &BitOrd;
+    CellMapShadow &CM;
+  };
+}
+
+
+bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
+  // Ordering of registers, made up from two given orderings:
+  // - the ordering of the register numbers, and
+  // - the ordering of register cells.
+  // Def. R1 < R2 if:
+  // - cell(R1) < cell(R2), or
+  // - cell(R1) == cell(R2), and index(R1) < index(R2).
+  //
+  // For register cells, the ordering is lexicographic, with index 0 being
+  // the most significant.
+  if (VR1 == VR2)
+    return false;
+
+  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
+  uint16_t W1 = RC1.width(), W2 = RC2.width();
+  for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
+    const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
+    if (V1 != V2)
+      return BitOrd(V1, V2);
+  }
+  // Cells are equal up until the common length.
+  if (W1 != W2)
+    return W1 < W2;
+
+  return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
+}
+
+
+bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
+  if (VR1 == VR2)
+    return false;
+  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
+  const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
+  uint16_t W1 = RC1.width(), W2 = RC2.width();
+  uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
+  uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
+  // If Bit1 exceeds the width of VR1, then:
+  // - return false, if at the same time Bit2 exceeds VR2, or
+  // - return true, otherwise.
+  // (I.e. "a bit value that does not exist is less than any bit value
+  // that does exist".)
+  if (W1 <= Bit1)
+    return Bit2 < W2;
+  // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
+  if (W2 <= Bit2)
+    return false;
+
+  const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
+  if (V1 != V2)
+    return BitOrd(V1, V2);
+  return false;
+}
+
+
+namespace {
+  class OrderedRegisterList {
+    typedef std::vector<unsigned> ListType;
+  public:
+    OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+    void insert(unsigned VR);
+    void remove(unsigned VR);
+    unsigned operator[](unsigned Idx) const {
+      assert(Idx < Seq.size());
+      return Seq[Idx];
+    }
+    unsigned size() const {
+      return Seq.size();
+    }
+
+    typedef ListType::iterator iterator;
+    typedef ListType::const_iterator const_iterator;
+    iterator begin() { return Seq.begin(); }
+    iterator end() { return Seq.end(); }
+    const_iterator begin() const { return Seq.begin(); }
+    const_iterator end() const { return Seq.end(); }
+
+    // Convenience function to convert an iterator to the corresponding index.
+    unsigned idx(iterator It) const { return It-begin(); }
+  private:
+    ListType Seq;
+    const RegisterOrdering &Ord;
+  };
+
+
+  struct PrintORL {
+    PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
+      : RL(L), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
+  private:
+    const OrderedRegisterList &RL;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
+    OS << '(';
+    OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
+    for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
+      if (I != B)
+        OS << ", ";
+      OS << PrintReg(*I, P.TRI);
+    }
+    OS << ')';
+    return OS;
+  }
+}
+
+
+void OrderedRegisterList::insert(unsigned VR) {
+  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  if (L == Seq.end())
+    Seq.push_back(VR);
+  else
+    Seq.insert(L, VR);
+}
+
+
+void OrderedRegisterList::remove(unsigned VR) {
+  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  assert(L != Seq.end());
+  Seq.erase(L);
+}
+
+
+namespace {
+  // A record of the insert form. The fields correspond to the operands
+  // of the "insert" instruction:
+  // ... = insert(SrcR, InsR, #Wdh, #Off)
+  struct IFRecord {
+    IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
+      : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
+    unsigned SrcR, InsR;
+    uint16_t Wdh, Off;
+  };
+
+  struct PrintIFR {
+    PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
+      : IFR(R), TRI(RI) {}
+  private:
+    const IFRecord &IFR;
+    const TargetRegisterInfo *TRI;
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
+    unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
+    OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI)
+       << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
+    return OS;
+  }
+
+  typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet;
+}
+
+
+namespace llvm {
+  void initializeHexagonGenInsertPass(PassRegistry&);
+  FunctionPass *createHexagonGenInsert();
+}
+
+
+namespace {
+  class HexagonGenInsert : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) {
+      initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon generate \"insert\" instructions";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType;
+
+    void buildOrderingMF(RegisterOrdering &RO) const;
+    void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
+    bool isIntClass(const TargetRegisterClass *RC) const;
+    bool isConstant(unsigned VR) const;
+    bool isSmallConstant(unsigned VR) const;
+    bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
+          uint16_t L, uint16_t S) const;
+    bool findSelfReference(unsigned VR) const;
+    bool findNonSelfReference(unsigned VR) const;
+    void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
+    void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
+    unsigned distance(const MachineBasicBlock *FromB,
+          const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+          PairMapType &M) const;
+    unsigned distance(MachineBasicBlock::const_iterator FromI,
+          MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+          PairMapType &M) const;
+    bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
+    void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
+    void findRemovableRegisters(unsigned VR, IFRecord IF,
+          RegisterSet &RMs) const;
+    void computeRemovableRegisters();
+
+    void pruneEmptyLists();
+    void pruneCoveredSets(unsigned VR);
+    void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
+    void pruneRegCopies(unsigned VR);
+    void pruneCandidates();
+    void selectCandidates();
+    bool generateInserts();
+
+    bool removeDeadCode(MachineDomTreeNode *N);
+
+    // IFRecord coupled with a set of potentially removable registers:
+    typedef std::vector<IFRecordWithRegSet> IFListType;
+    typedef DenseMap<unsigned,IFListType> IFMapType;  // vreg -> IFListType
+
+    void dump_map() const;
+
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+
+    MachineFunction *MFN;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *MDT;
+    CellMapShadow *CMS;
+
+    RegisterOrdering BaseOrd;
+    RegisterOrdering CellOrd;
+    IFMapType IFMap;
+  };
+
+  char HexagonGenInsert::ID = 0;
+}
+
+
+void HexagonGenInsert::dump_map() const {
+  typedef IFMapType::const_iterator iterator;
+  for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    dbgs() << "  " << PrintReg(I->first, HRI) << ":\n";
+    const IFListType &LL = I->second;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      dbgs() << "    " << PrintIFR(LL[i].first, HRI) << ", "
+             << PrintRegSet(LL[i].second, HRI) << '\n';
+  }
+}
+
+
+void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
+  unsigned Index = 0;
+  typedef MachineFunction::const_iterator mf_iterator;
+  for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
+    const MachineBasicBlock &B = *A;
+    if (!CMS->BT.reached(&B))
+      continue;
+    typedef MachineBasicBlock::const_iterator mb_iterator;
+    for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      const MachineInstr *MI = &*I;
+      for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isDef()) {
+          unsigned R = MO.getReg();
+          assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
+          if (TargetRegisterInfo::isVirtualRegister(R))
+            RO.insert(std::make_pair(R, Index++));
+        }
+      }
+    }
+  }
+  // Since some virtual registers may have had their def and uses eliminated,
+  // they are no longer referenced in the code, and so they will not appear
+  // in the map.
+}
+
+
+void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
+      RegisterOrdering &RO) const {
+  // Create a vector of all virtual registers (collect them from the base
+  // ordering RB), and then sort it using the RegisterCell comparator.
+  BitValueOrdering BVO(RB);
+  RegisterCellLexCompare LexCmp(BVO, *CMS);
+  typedef std::vector<unsigned> SortableVectorType;
+  SortableVectorType VRs;
+  for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
+    VRs.push_back(I->first);
+  std::sort(VRs.begin(), VRs.end(), LexCmp);
+  // Transfer the results to the outgoing register ordering.
+  for (unsigned i = 0, n = VRs.size(); i < n; ++i)
+    RO.insert(std::make_pair(VRs[i], i));
+}
+
+
+inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
+  return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
+}
+
+
+bool HexagonGenInsert::isConstant(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitTracker::BitValue &BV = RC[i];
+    if (BV.is(0) || BV.is(1))
+      continue;
+    return false;
+  }
+  return true;
+}
+
+
+bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+  if (W > 64)
+    return false;
+  uint64_t V = 0, B = 1;
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitTracker::BitValue &BV = RC[i];
+    if (BV.is(1))
+      V |= B;
+    else if (!BV.is(0))
+      return false;
+    B <<= 1;
+  }
+
+  // For 32-bit registers, consider: Rd = #s16.
+  if (W == 32)
+    return isInt<16>(V);
+
+  // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
+  return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
+}
+
+
+bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
+      unsigned InsR, uint16_t L, uint16_t S) const {
+  const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
+  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
+  const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
+  // Only integet (32-/64-bit) register classes.
+  if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
+    return false;
+  // The "source" register must be of the same class as DstR.
+  if (DstRC != SrcRC)
+    return false;
+  if (DstRC == InsRC)
+    return true;
+  // A 64-bit register can only be generated from other 64-bit registers.
+  if (DstRC == &Hexagon::DoubleRegsRegClass)
+    return false;
+  // Otherwise, the L and S cannot span 32-bit word boundary.
+  if (S < 32 && S+L > 32)
+    return false;
+  return true;
+}
+
+
+bool HexagonGenInsert::findSelfReference(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
+  BitTracker::RegisterCell RC = CMS->lookup(VR);
+  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
+      return true;
+  }
+  return false;
+}
+
+
+void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
+      RegisterSet &Defs) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Defs.insert(R);
+  }
+}
+
+
+void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
+      RegisterSet &Uses) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned R = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.insert(R);
+  }
+}
+
+
+unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
+      const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+      PairMapType &M) const {
+  // Forward distance from the end of a block to the beginning of it does
+  // not make sense. This function should not be called with FromB == ToB.
+  assert(FromB != ToB);
+
+  unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
+  // If we have already computed it, return the cached result.
+  PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
+  if (F != M.end())
+    return F->second;
+  unsigned ToRPO = RPO.lookup(ToN);
+
+  unsigned MaxD = 0;
+  typedef MachineBasicBlock::const_pred_iterator pred_iterator;
+  for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
+    const MachineBasicBlock *PB = *I;
+    // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
+    // along that path will be 0, and we don't need to do any calculations
+    // on it.
+    if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
+      continue;
+    unsigned D = PB->size() + distance(FromB, PB, RPO, M);
+    if (D > MaxD)
+      MaxD = D;
+  }
+
+  // Memoize the result for later lookup.
+  M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
+  return MaxD;
+}
+
+
+unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
+      MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+      PairMapType &M) const {
+  const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
+  if (FB == TB)
+    return std::distance(FromI, ToI);
+  unsigned D1 = std::distance(TB->begin(), ToI);
+  unsigned D2 = distance(FB, TB, RPO, M);
+  unsigned D3 = std::distance(FromI, FB->end());
+  return D1+D2+D3;
+}
+
+
+bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
+      OrderedRegisterList &AVs) {
+  if (isDebug()) {
+    dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI)
+           << "  AVs: " << PrintORL(AVs, HRI) << "\n";
+  }
+  if (AVs.size() == 0)
+    return false;
+
+  typedef OrderedRegisterList::iterator iterator;
+  BitValueOrdering BVO(BaseOrd);
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+
+  typedef std::pair<unsigned,uint16_t> RSRecord;  // (reg,shift)
+  typedef std::vector<RSRecord> RSListType;
+  // Have a map, with key being the matching prefix length, and the value
+  // being the list of pairs (R,S), where R's prefix matches VR at S.
+  // (DenseMap<uint16_t,RSListType> fails to instantiate.)
+  typedef DenseMap<unsigned,RSListType> LRSMapType;
+  LRSMapType LM;
+
+  // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
+  // and find matching prefixes from AVs with the rotated RC. Such a prefix
+  // would match a string of bits (of length L) in RC starting at S.
+  for (uint16_t S = 0; S < W; ++S) {
+    iterator B = AVs.begin(), E = AVs.end();
+    // The registers in AVs are ordered according to the lexical order of
+    // the corresponding register cells. This means that the range of regis-
+    // ters in AVs that match a prefix of length L+1 will be contained in
+    // the range that matches a prefix of length L. This means that we can
+    // keep narrowing the search space as the prefix length goes up. This
+    // helps reduce the overall complexity of the search.
+    uint16_t L;
+    for (L = 0; L < W-S; ++L) {
+      // Compare against VR's bits starting at S, which emulates rotation
+      // of VR by S.
+      RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
+      iterator NewB = std::lower_bound(B, E, VR, RCB);
+      iterator NewE = std::upper_bound(NewB, E, VR, RCB);
+      // For the registers that are eliminated from the next range, L is
+      // the longest prefix matching VR at position S (their prefixes
+      // differ from VR at S+L). If L>0, record this information for later
+      // use.
+      if (L > 0) {
+        for (iterator I = B; I != NewB; ++I)
+          LM[L].push_back(std::make_pair(*I, S));
+        for (iterator I = NewE; I != E; ++I)
+          LM[L].push_back(std::make_pair(*I, S));
+      }
+      B = NewB, E = NewE;
+      if (B == E)
+        break;
+    }
+    // Record the final register range. If this range is non-empty, then
+    // L=W-S.
+    assert(B == E || L == W-S);
+    if (B != E) {
+      for (iterator I = B; I != E; ++I)
+        LM[L].push_back(std::make_pair(*I, S));
+      // If B!=E, then we found a range of registers whose prefixes cover the
+      // rest of VR from position S. There is no need to further advance S.
+      break;
+    }
+  }
+
+  if (isDebug()) {
+    dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n";
+    for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
+      dbgs() << "  L=" << I->first << ':';
+      const RSListType &LL = I->second;
+      for (unsigned i = 0, n = LL.size(); i < n; ++i)
+        dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@"
+               << LL[i].second << ')';
+      dbgs() << '\n';
+    }
+  }
+
+
+  bool Recorded = false;
+
+  for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
+    unsigned SrcR = *I;
+    int FDi = -1, LDi = -1;   // First/last different bit.
+    const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
+    uint16_t AW = AC.width();
+    for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
+      if (RC[i] == AC[i])
+        continue;
+      if (FDi == -1)
+        FDi = i;
+      LDi = i;
+    }
+    if (FDi == -1)
+      continue;  // TODO (future): Record identical registers.
+    // Look for a register whose prefix could patch the range [FD..LD]
+    // where VR and SrcR differ.
+    uint16_t FD = FDi, LD = LDi;  // Switch to unsigned type.
+    uint16_t MinL = LD-FD+1;
+    for (uint16_t L = MinL; L < W; ++L) {
+      LRSMapType::iterator F = LM.find(L);
+      if (F == LM.end())
+        continue;
+      RSListType &LL = F->second;
+      for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+        uint16_t S = LL[i].second;
+        // MinL is the minimum length of the prefix. Any length above MinL
+        // allows some flexibility as to where the prefix can start:
+        // given the extra length EL=L-MinL, the prefix must start between
+        // max(0,FD-EL) and FD.
+        if (S > FD)   // Starts too late.
+          continue;
+        uint16_t EL = L-MinL;
+        uint16_t LowS = (EL < FD) ? FD-EL : 0;
+        if (S < LowS) // Starts too early.
+          continue;
+        unsigned InsR = LL[i].first;
+        if (!isValidInsertForm(VR, SrcR, InsR, L, S))
+          continue;
+        if (isDebug()) {
+          dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI)
+                 << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#"
+                 << S << ")\n";
+        }
+        IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
+        IFMap[VR].push_back(RR);
+        Recorded = true;
+      }
+    }
+  }
+
+  return Recorded;
+}
+
+
+void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
+      OrderedRegisterList &AVs) {
+  if (isDebug())
+    dbgs() << "visiting block BB#" << B->getNumber() << "\n";
+
+  // First, check if this block is reachable at all. If not, the bit tracker
+  // will not have any information about registers in it.
+  if (!CMS->BT.reached(B))
+    return;
+
+  bool DoConst = OptConst;
+  // Keep a separate set of registers defined in this block, so that we
+  // can remove them from the list of available registers once all DT
+  // successors have been processed.
+  RegisterSet BlockDefs, InsDefs;
+  for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    InsDefs.clear();
+    getInstrDefs(MI, InsDefs);
+    // Leave those alone. They are more transparent than "insert".
+    bool Skip = MI->isCopy() || MI->isRegSequence();
+
+    if (!Skip) {
+      // Visit all defined registers, and attempt to find the corresponding
+      // "insert" representations.
+      for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
+        // Do not collect registers that are known to be compile-time cons-
+        // tants, unless requested.
+        if (!DoConst && isConstant(VR))
+          continue;
+        // If VR's cell contains a reference to VR, then VR cannot be defined
+        // via "insert". If VR is a constant that can be generated in a single
+        // instruction (without constant extenders), generating it via insert
+        // makes no sense.
+        if (findSelfReference(VR) || isSmallConstant(VR))
+          continue;
+
+        findRecordInsertForms(VR, AVs);
+      }
+    }
+
+    // Insert the defined registers into the list of available registers
+    // after they have been processed.
+    for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
+      AVs.insert(VR);
+    BlockDefs.insert(InsDefs);
+  }
+
+  MachineDomTreeNode *N = MDT->getNode(B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType ChildIter;
+  for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    collectInBlock(SB, AVs);
+  }
+
+  for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
+    AVs.remove(VR);
+}
+
+
+void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
+      RegisterSet &RMs) const {
+  // For a given register VR and a insert form, find the registers that are
+  // used by the current definition of VR, and which would no longer be
+  // needed for it after the definition of VR is replaced with the insert
+  // form. These are the registers that could potentially become dead.
+  RegisterSet Regs[2];
+
+  unsigned S = 0;  // Register set selector.
+  Regs[S].insert(VR);
+
+  while (!Regs[S].empty()) {
+    // Breadth-first search.
+    unsigned OtherS = 1-S;
+    Regs[OtherS].clear();
+    for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
+      Regs[S].remove(R);
+      if (R == IF.SrcR || R == IF.InsR)
+        continue;
+      // Check if a given register has bits that are references to any other
+      // registers. This is to detect situations where the instruction that
+      // defines register R takes register Q as an operand, but R itself does
+      // not contain any bits from Q. Loads are examples of how this could
+      // happen:
+      //   R = load Q
+      // In this case (assuming we do not have any knowledge about the loaded
+      // value), we must not treat R as a "conveyance" of the bits from Q.
+      // (The information in BT about R's bits would have them as constants,
+      // in case of zero-extending loads, or refs to R.)
+      if (!findNonSelfReference(R))
+        continue;
+      RMs.insert(R);
+      const MachineInstr *DefI = MRI->getVRegDef(R);
+      assert(DefI);
+      // Do not iterate past PHI nodes to avoid infinite loops. This can
+      // make the final set a bit less accurate, but the removable register
+      // sets are an approximation anyway.
+      if (DefI->isPHI())
+        continue;
+      getInstrUses(DefI, Regs[OtherS]);
+    }
+    S = OtherS;
+  }
+  // The register VR is added to the list as a side-effect of the algorithm,
+  // but it is not "potentially removable". A potentially removable register
+  // is one that may become unused (dead) after conversion to the insert form
+  // IF, and obviously VR (or its replacement) will not become dead by apply-
+  // ing IF.
+  RMs.remove(VR);
+}
+
+
+void HexagonGenInsert::computeRemovableRegisters() {
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    IFListType &LL = I->second;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      findRemovableRegisters(I->first, LL[i].first, LL[i].second);
+  }
+}
+
+
+void HexagonGenInsert::pruneEmptyLists() {
+  // Remove all entries from the map, where the register has no insert forms
+  // associated with it.
+  typedef SmallVector<IFMapType::iterator,16> IterListType;
+  IterListType Prune;
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    if (I->second.size() == 0)
+      Prune.push_back(I);
+  }
+  for (unsigned i = 0, n = Prune.size(); i < n; ++i)
+    IFMap.erase(Prune[i]);
+}
+
+
+void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+
+  // First, examine the IF candidates for register VR whose removable-regis-
+  // ter sets are empty. This means that a given candidate will not help eli-
+  // minate any registers, but since "insert" is not a constant-extendable
+  // instruction, using such a candidate may reduce code size if the defini-
+  // tion of VR is constant-extended.
+  // If there exists a candidate with a non-empty set, the ones with empty
+  // sets will not be used and can be removed.
+  MachineInstr *DefVR = MRI->getVRegDef(VR);
+  bool DefEx = HII->isConstExtended(DefVR);
+  bool HasNE = false;
+  for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+    if (LL[i].second.empty())
+      continue;
+    HasNE = true;
+    break;
+  }
+  if (!DefEx || HasNE) {
+    // The definition of VR is not constant-extended, or there is a candidate
+    // with a non-empty set. Remove all candidates with empty sets.
+    auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
+      return IR.second.empty();
+    };
+    auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty);
+    if (End != LL.end())
+      LL.erase(End, LL.end());
+  } else {
+    // The definition of VR is constant-extended, and all candidates have
+    // empty removable-register sets. Pick the maximum candidate, and remove
+    // all others. The "maximum" does not have any special meaning here, it
+    // is only so that the candidate that will remain on the list is selec-
+    // ted deterministically.
+    IFRecord MaxIF = LL[0].first;
+    for (unsigned i = 1, n = LL.size(); i < n; ++i) {
+      // If LL[MaxI] < LL[i], then MaxI = i.
+      const IFRecord &IF = LL[i].first;
+      unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
+      unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
+      if (M0 > R0)
+        continue;
+      if (M0 == R0) {
+        if (M1 > R1)
+          continue;
+        if (M1 == R1) {
+          if (MaxIF.Wdh > IF.Wdh)
+            continue;
+          if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
+            continue;
+        }
+      }
+      // MaxIF < IF.
+      MaxIF = IF;
+    }
+    // Remove everything except the maximum candidate. All register sets
+    // are empty, so no need to preserve anything.
+    LL.clear();
+    LL.push_back(std::make_pair(MaxIF, RegisterSet()));
+  }
+
+  // Now, remove those whose sets of potentially removable registers are
+  // contained in another IF candidate for VR. For example, given these
+  // candidates for vreg45,
+  //   %vreg45:
+  //     (%vreg44,%vreg41,#9,#8), { %vreg42 }
+  //     (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 }
+  // remove the first one, since it is contained in the second one.
+  for (unsigned i = 0, n = LL.size(); i < n; ) {
+    const RegisterSet &RMi = LL[i].second;
+    unsigned j = 0;
+    while (j < n) {
+      if (j != i && LL[j].second.includes(RMi))
+        break;
+      j++;
+    }
+    if (j == n) {   // RMi not contained in anything else.
+      i++;
+      continue;
+    }
+    LL.erase(LL.begin()+i);
+    n = LL.size();
+  }
+}
+
+
+void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
+      PairMapType &M) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+  unsigned Cutoff = VRegDistCutoff;
+  const MachineInstr *DefV = MRI->getVRegDef(VR);
+
+  for (unsigned i = LL.size(); i > 0; --i) {
+    unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
+    const MachineInstr *DefS = MRI->getVRegDef(SR);
+    const MachineInstr *DefI = MRI->getVRegDef(IR);
+    unsigned DSV = distance(DefS, DefV, RPO, M);
+    if (DSV < Cutoff) {
+      unsigned DIV = distance(DefI, DefV, RPO, M);
+      if (DIV < Cutoff)
+        continue;
+    }
+    LL.erase(LL.begin()+(i-1));
+  }
+}
+
+
+void HexagonGenInsert::pruneRegCopies(unsigned VR) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+
+  auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
+    return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
+  };
+  auto End = std::remove_if(LL.begin(), LL.end(), IsCopy);
+  if (End != LL.end())
+    LL.erase(End, LL.end());
+}
+
+
+void HexagonGenInsert::pruneCandidates() {
+  // Remove candidates that are not beneficial, regardless of the final
+  // selection method.
+  // First, remove candidates whose potentially removable set is a subset
+  // of another candidate's set.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneCoveredSets(I->first);
+
+  UnsignedMap RPO;
+  typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+  RPOTType RPOT(MFN);
+  unsigned RPON = 0;
+  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    RPO[(*I)->getNumber()] = RPON++;
+
+  PairMapType Memo; // Memoization map for distance calculation.
+  // Remove candidates that would use registers defined too far away.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneUsesTooFar(I->first, RPO, Memo);
+
+  pruneEmptyLists();
+
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneRegCopies(I->first);
+}
+
+
+namespace {
+  // Class for comparing IF candidates for registers that have multiple of
+  // them. The smaller the candidate, according to this ordering, the better.
+  // First, compare the number of zeros in the associated potentially remova-
+  // ble register sets. "Zero" indicates that the register is very likely to
+  // become dead after this transformation.
+  // Second, compare "averages", i.e. use-count per size. The lower wins.
+  // After that, it does not really matter which one is smaller. Resolve
+  // the tie in some deterministic way.
+  struct IFOrdering {
+    IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
+      : UseC(UC), BaseOrd(BO) {}
+    bool operator() (const IFRecordWithRegSet &A,
+          const IFRecordWithRegSet &B) const;
+  private:
+    void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+          unsigned &Sum) const;
+    const UnsignedMap &UseC;
+    const RegisterOrdering &BaseOrd;
+  };
+}
+
+
+bool IFOrdering::operator() (const IFRecordWithRegSet &A,
+      const IFRecordWithRegSet &B) const {
+  unsigned SizeA = 0, ZeroA = 0, SumA = 0;
+  unsigned SizeB = 0, ZeroB = 0, SumB = 0;
+  stats(A.second, SizeA, ZeroA, SumA);
+  stats(B.second, SizeB, ZeroB, SumB);
+
+  // We will pick the minimum element. The more zeros, the better.
+  if (ZeroA != ZeroB)
+    return ZeroA > ZeroB;
+  // Compare SumA/SizeA with SumB/SizeB, lower is better.
+  uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
+  if (AvgA != AvgB)
+    return AvgA < AvgB;
+
+  // The sets compare identical so far. Resort to comparing the IF records.
+  // The actual values don't matter, this is only for determinism.
+  unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
+  if (OSA != OSB)
+    return OSA < OSB;
+  unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
+  if (OIA != OIB)
+    return OIA < OIB;
+  if (A.first.Wdh != B.first.Wdh)
+    return A.first.Wdh < B.first.Wdh;
+  return A.first.Off < B.first.Off;
+}
+
+
+void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+      unsigned &Sum) const {
+  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
+    UnsignedMap::const_iterator F = UseC.find(R);
+    assert(F != UseC.end());
+    unsigned UC = F->second;
+    if (UC == 0)
+      Zero++;
+    Sum += UC;
+    Size++;
+  }
+}
+
+
+void HexagonGenInsert::selectCandidates() {
+  // Some registers may have multiple valid candidates. Pick the best one
+  // (or decide not to use any).
+
+  // Compute the "removability" measure of R:
+  // For each potentially removable register R, record the number of regis-
+  // ters with IF candidates, where R appears in at least one set.
+  RegisterSet AllRMs;
+  UnsignedMap UseC, RemC;
+  IFMapType::iterator End = IFMap.end();
+
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    const IFListType &LL = I->second;
+    RegisterSet TT;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      TT.insert(LL[i].second);
+    for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
+      RemC[R]++;
+    AllRMs.insert(TT);
+  }
+
+  for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
+    typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+    typedef SmallSet<const MachineInstr*,16> InstrSet;
+    InstrSet UIs;
+    // Count as the number of instructions in which R is used, not the
+    // number of operands.
+    use_iterator E = MRI->use_nodbg_end();
+    for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
+      UIs.insert(I->getParent());
+    unsigned C = UIs.size();
+    // Calculate a measure, which is the number of instructions using R,
+    // minus the "removability" count computed earlier.
+    unsigned D = RemC[R];
+    UseC[R] = (C > D) ? C-D : 0;  // doz
+  }
+
+
+  bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
+  if (!SelectAll0 && !SelectHas0)
+    SelectAll0 = true;
+
+  // The smaller the number UseC for a given register R, the "less used"
+  // R is aside from the opportunities for removal offered by generating
+  // "insert" instructions.
+  // Iterate over the IF map, and for those registers that have multiple
+  // candidates, pick the minimum one according to IFOrdering.
+  IFOrdering IFO(UseC, BaseOrd);
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    IFListType &LL = I->second;
+    if (LL.empty())
+      continue;
+    // Get the minimum element, remember it and clear the list. If the
+    // element found is adequate, we will put it back on the list, other-
+    // wise the list will remain empty, and the entry for this register
+    // will be removed (i.e. this register will not be replaced by insert).
+    IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
+    assert(MinI != LL.end());
+    IFRecordWithRegSet M = *MinI;
+    LL.clear();
+
+    // We want to make sure that this replacement will have a chance to be
+    // beneficial, and that means that we want to have indication that some
+    // register will be removed. The most likely registers to be eliminated
+    // are the use operands in the definition of I->first. Accept/reject a
+    // candidate based on how many of its uses it can potentially eliminate.
+
+    RegisterSet Us;
+    const MachineInstr *DefI = MRI->getVRegDef(I->first);
+    getInstrUses(DefI, Us);
+    bool Accept = false;
+
+    if (SelectAll0) {
+      bool All0 = true;
+      for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+        if (UseC[R] == 0)
+          continue;
+        All0 = false;
+        break;
+      }
+      Accept = All0;
+    } else if (SelectHas0) {
+      bool Has0 = false;
+      for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+        if (UseC[R] != 0)
+          continue;
+        Has0 = true;
+        break;
+      }
+      Accept = Has0;
+    }
+    if (Accept)
+      LL.push_back(M);
+  }
+
+  // Remove candidates that add uses of removable registers, unless the
+  // removable registers are among replacement candidates.
+  // Recompute the removable registers, since some candidates may have
+  // been eliminated.
+  AllRMs.clear();
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    const IFListType &LL = I->second;
+    if (LL.size() > 0)
+      AllRMs.insert(LL[0].second);
+  }
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    IFListType &LL = I->second;
+    if (LL.size() == 0)
+      continue;
+    unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
+    if (AllRMs[SR] || AllRMs[IR])
+      LL.clear();
+  }
+
+  pruneEmptyLists();
+}
+
+
+bool HexagonGenInsert::generateInserts() {
+  // Create a new register for each one from IFMap, and store them in the
+  // map.
+  UnsignedMap RegMap;
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    unsigned VR = I->first;
+    const TargetRegisterClass *RC = MRI->getRegClass(VR);
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    RegMap[VR] = NewVR;
+  }
+
+  // We can generate the "insert" instructions using potentially stale re-
+  // gisters: SrcR and InsR for a given VR may be among other registers that
+  // are also replaced. This is fine, we will do the mass "rauw" a bit later.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    MachineInstr *MI = MRI->getVRegDef(I->first);
+    MachineBasicBlock &B = *MI->getParent();
+    DebugLoc DL = MI->getDebugLoc();
+    unsigned NewR = RegMap[I->first];
+    bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
+    const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
+                               : HII->get(Hexagon::S2_insertp);
+    IFRecord IF = I->second[0].first;
+    unsigned Wdh = IF.Wdh, Off = IF.Off;
+    unsigned InsS = 0;
+    if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
+      InsS = Hexagon::subreg_loreg;
+      if (Off >= 32) {
+        InsS = Hexagon::subreg_hireg;
+        Off -= 32;
+      }
+    }
+    // Advance to the proper location for inserting instructions. This could
+    // be B.end().
+    MachineBasicBlock::iterator At = MI;
+    if (MI->isPHI())
+      At = B.getFirstNonPHI();
+
+    BuildMI(B, At, DL, D, NewR)
+      .addReg(IF.SrcR)
+      .addReg(IF.InsR, 0, InsS)
+      .addImm(Wdh)
+      .addImm(Off);
+
+    MRI->clearKillFlags(IF.SrcR);
+    MRI->clearKillFlags(IF.InsR);
+  }
+
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    MachineInstr *DefI = MRI->getVRegDef(I->first);
+    MRI->replaceRegWith(I->first, RegMap[I->first]);
+    DefI->eraseFromParent();
+  }
+
+  return true;
+}
+
+
+bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
+  bool Changed = false;
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+    Changed |= removeDeadCode(*I);
+
+  MachineBasicBlock *B = N->getBlock();
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    unsigned Opc = MI->getOpcode();
+    // Do not touch lifetime markers. This is why the target-independent DCE
+    // cannot be used.
+    if (Opc == TargetOpcode::LIFETIME_START ||
+        Opc == TargetOpcode::LIFETIME_END)
+      continue;
+    bool Store = false;
+    if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
+      continue;
+
+    bool AllDead = true;
+    SmallVector<unsigned,2> Regs;
+    for (ConstMIOperands Op(MI); Op.isValid(); ++Op) {
+      if (!Op->isReg() || !Op->isDef())
+        continue;
+      unsigned R = Op->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R) ||
+          !MRI->use_nodbg_empty(R)) {
+        AllDead = false;
+        break;
+      }
+      Regs.push_back(R);
+    }
+    if (!AllDead)
+      continue;
+
+    B->erase(MI);
+    for (unsigned I = 0, N = Regs.size(); I != N; ++I)
+      MRI->markUsesInDebugValueAsUndef(Regs[I]);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
+  bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
+  bool Changed = false;
+  TimerGroup __G("hexinsert");
+  NamedRegionTimer __T("hexinsert", Timing && !TimingDetail);
+
+  // Sanity check: one, but not both.
+  assert(!OptSelectAll0 || !OptSelectHas0);
+
+  IFMap.clear();
+  BaseOrd.clear();
+  CellOrd.clear();
+
+  const auto &ST = MF.getSubtarget<HexagonSubtarget>();
+  HII = ST.getInstrInfo();
+  HRI = ST.getRegisterInfo();
+  MFN = &MF;
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+
+  // Clean up before any further processing, so that dead code does not
+  // get used in a newly generated "insert" instruction. Have a custom
+  // version of DCE that preserves lifetime markers. Without it, merging
+  // of stack objects can fail to recognize and merge disjoint objects
+  // leading to unnecessary stack growth.
+  Changed = removeDeadCode(MDT->getRootNode());
+
+  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+  BitTracker BTLoc(HE, MF);
+  BTLoc.trace(isDebug());
+  BTLoc.run();
+  CellMapShadow MS(BTLoc);
+  CMS = &MS;
+
+  buildOrderingMF(BaseOrd);
+  buildOrderingBT(BaseOrd, CellOrd);
+
+  if (isDebug()) {
+    dbgs() << "Cell ordering:\n";
+    for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
+        I != E; ++I) {
+      unsigned VR = I->first, Pos = I->second;
+      dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n";
+    }
+  }
+
+  // Collect candidates for conversion into the insert forms.
+  MachineBasicBlock *RootB = MDT->getRoot();
+  OrderedRegisterList AvailR(CellOrd);
+
+  {
+    NamedRegionTimer _T("collection", "hexinsert", TimingDetail);
+    collectInBlock(RootB, AvailR);
+    // Complete the information gathered in IFMap.
+    computeRemovableRegisters();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after collection:\n";
+    dump_map();
+  }
+
+  if (IFMap.empty())
+    return Changed;
+
+  {
+    NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
+    pruneCandidates();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after pruning:\n";
+    dump_map();
+  }
+
+  if (IFMap.empty())
+    return Changed;
+
+  {
+    NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
+    selectCandidates();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after selection:\n";
+    dump_map();
+  }
+
+  // Filter out vregs beyond the cutoff.
+  if (VRegIndexCutoff.getPosition()) {
+    unsigned Cutoff = VRegIndexCutoff;
+    typedef SmallVector<IFMapType::iterator,16> IterListType;
+    IterListType Out;
+    for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+      unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first);
+      if (Idx >= Cutoff)
+        Out.push_back(I);
+    }
+    for (unsigned i = 0, n = Out.size(); i < n; ++i)
+      IFMap.erase(Out[i]);
+  }
+  if (IFMap.empty())
+    return Changed;
+
+  {
+    NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
+    generateInserts();
+  }
+
+  return true;
+}
+
+
+FunctionPass *llvm::createHexagonGenInsert() {
+  return new HexagonGenInsert();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
+  "Hexagon generate \"insert\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
+  "Hexagon generate \"insert\" instructions", false, false)
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
new file mode 100644
index 0000000..c059d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -0,0 +1,319 @@
+//===--- HexagonGenMux.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// During instruction selection, MUX instructions are generated for
+// conditional assignments. Since such assignments often present an
+// opportunity to predicate instructions, HexagonExpandCondsets
+// expands MUXes into pairs of conditional transfers, and then proceeds
+// with predication of the producers/consumers of the registers involved.
+// This happens after exiting from the SSA form, but before the machine
+// instruction scheduler. After the scheduler and after the register
+// allocation there can be cases of pairs of conditional transfers
+// resulting from a MUX where neither of them was further predicated. If
+// these transfers are now placed far enough from the instruction defining
+// the predicate register, they cannot use the .new form. In such cases it
+// is better to collapse them back to a single MUX instruction.
+
+#define DEBUG_TYPE "hexmux"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonGenMux();
+  void initializeHexagonGenMuxPass(PassRegistry& Registry);
+}
+
+namespace {
+  class HexagonGenMux : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+      initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon generate mux instructions";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+
+    struct CondsetInfo {
+      unsigned PredR;
+      unsigned TrueX, FalseX;
+      CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+    };
+    struct DefUseInfo {
+      BitVector Defs, Uses;
+      DefUseInfo() : Defs(), Uses() {}
+      DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
+    };
+    struct MuxInfo {
+      MachineBasicBlock::iterator At;
+      unsigned DefR, PredR;
+      MachineOperand *SrcT, *SrcF;
+      MachineInstr *Def1, *Def2;
+      MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
+            MachineOperand *TOp, MachineOperand *FOp,
+            MachineInstr *D1, MachineInstr *D2)
+        : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1),
+          Def2(D2) {}
+    };
+    typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
+    typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
+    typedef SmallVector<MuxInfo,4> MuxInfoList;
+
+    bool isRegPair(unsigned Reg) const {
+      return Hexagon::DoubleRegsRegClass.contains(Reg);
+    }
+    void getSubRegs(unsigned Reg, BitVector &SRs) const;
+    void expandReg(unsigned Reg, BitVector &Set) const;
+    void getDefsUses(const MachineInstr *MI, BitVector &Defs,
+          BitVector &Uses) const;
+    void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+          DefUseInfoMap &DUM);
+    bool isCondTransfer(unsigned Opc) const;
+    unsigned getMuxOpcode(const MachineOperand &Src1,
+          const MachineOperand &Src2) const;
+    bool genMuxInBlock(MachineBasicBlock &B);
+  };
+
+  char HexagonGenMux::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+  "Hexagon generate mux instructions", false, false)
+
+
+void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
+  for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
+    SRs[*I] = true;
+}
+
+
+void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
+  if (isRegPair(Reg))
+    getSubRegs(Reg, Set);
+  else
+    Set[Reg] = true;
+}
+
+
+void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
+      BitVector &Uses) const {
+  // First, get the implicit defs and uses for this instruction.
+  unsigned Opc = MI->getOpcode();
+  const MCInstrDesc &D = HII->get(Opc);
+  if (const MCPhysReg *R = D.ImplicitDefs)
+    while (*R)
+      expandReg(*R++, Defs);
+  if (const MCPhysReg *R = D.ImplicitUses)
+    while (*R)
+      expandReg(*R++, Uses);
+
+  // Look over all operands, and collect explicit defs and uses.
+  for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+    if (!Mo->isReg() || Mo->isImplicit())
+      continue;
+    unsigned R = Mo->getReg();
+    BitVector &Set = Mo->isDef() ? Defs : Uses;
+    expandReg(R, Set);
+  }
+}
+
+
+void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+      DefUseInfoMap &DUM) {
+  unsigned Index = 0;
+  unsigned NR = HRI->getNumRegs();
+  BitVector Defs(NR), Uses(NR);
+
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    I2X.insert(std::make_pair(MI, Index));
+    Defs.reset();
+    Uses.reset();
+    getDefsUses(MI, Defs, Uses);
+    DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
+    Index++;
+  }
+}
+
+
+bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
+  switch (Opc) {
+    case Hexagon::A2_tfrt:
+    case Hexagon::A2_tfrf:
+    case Hexagon::C2_cmoveit:
+    case Hexagon::C2_cmoveif:
+      return true;
+  }
+  return false;
+}
+
+
+unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
+      const MachineOperand &Src2) const {
+  bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
+  if (IsReg1)
+    return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
+  if (IsReg2)
+    return Hexagon::C2_muxri;
+
+  // Neither is a register. The first source is extendable, but the second
+  // is not (s8).
+  if (Src2.isImm() && isInt<8>(Src2.getImm()))
+    return Hexagon::C2_muxii;
+
+  return 0;
+}
+
+
+bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  InstrIndexMap I2X;
+  DefUseInfoMap DUM;
+  buildMaps(B, I2X, DUM);
+
+  typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
+  CondsetMap CM;
+  MuxInfoList ML;
+
+  MachineBasicBlock::iterator NextI, End = B.end();
+  for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
+    MachineInstr *MI = &*I;
+    NextI = std::next(I);
+    unsigned Opc = MI->getOpcode();
+    if (!isCondTransfer(Opc))
+      continue;
+    unsigned DR = MI->getOperand(0).getReg();
+    if (isRegPair(DR))
+      continue;
+
+    unsigned PR = MI->getOperand(1).getReg();
+    unsigned Idx = I2X.lookup(MI);
+    CondsetMap::iterator F = CM.find(DR);
+    bool IfTrue = HII->isPredicatedTrue(Opc);
+
+    // If there is no record of a conditional transfer for this register,
+    // or the predicate register differs, create a new record for it.
+    if (F != CM.end() && F->second.PredR != PR) {
+      CM.erase(F);
+      F = CM.end();
+    }
+    if (F == CM.end()) {
+      auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
+      F = It.first;
+      F->second.PredR = PR;
+    }
+    CondsetInfo &CI = F->second;
+    if (IfTrue)
+      CI.TrueX = Idx;
+    else
+      CI.FalseX = Idx;
+    if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+      continue;
+
+    // There is now a complete definition of DR, i.e. we have the predicate
+    // register, the definition if-true, and definition if-false.
+
+    // First, check if both definitions are far enough from the definition
+    // of the predicate register.
+    unsigned MinX = std::min(CI.TrueX, CI.FalseX);
+    unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
+    unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+    bool NearDef = false;
+    for (unsigned X = SearchX; X < MaxX; ++X) {
+      const DefUseInfo &DU = DUM.lookup(X);
+      if (!DU.Defs[PR])
+        continue;
+      NearDef = true;
+      break;
+    }
+    if (NearDef)
+      continue;
+
+    // The predicate register is not defined in the last few instructions.
+    // Check if the conversion to MUX is possible (either "up", i.e. at the
+    // place of the earlier partial definition, or "down", where the later
+    // definition is located). Examine all defs and uses between these two
+    // definitions.
+    // SR1, SR2 - source registers from the first and the second definition.
+    MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
+    std::advance(It1, MinX);
+    std::advance(It2, MaxX);
+    MachineInstr *Def1 = It1, *Def2 = It2;
+    MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
+    unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
+    unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+    bool Failure = false, CanUp = true, CanDown = true;
+    for (unsigned X = MinX+1; X < MaxX; X++) {
+      const DefUseInfo &DU = DUM.lookup(X);
+      if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
+        Failure = true;
+        break;
+      }
+      if (CanDown && DU.Defs[SR1])
+        CanDown = false;
+      if (CanUp && DU.Defs[SR2])
+        CanUp = false;
+    }
+    if (Failure || (!CanUp && !CanDown))
+      continue;
+
+    MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
+    MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
+    // Prefer "down", since this will move the MUX farther away from the
+    // predicate definition.
+    MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+    ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
+  }
+
+  for (unsigned I = 0, N = ML.size(); I < N; ++I) {
+    MuxInfo &MX = ML[I];
+    MachineBasicBlock &B = *MX.At->getParent();
+    DebugLoc DL = MX.At->getDebugLoc();
+    unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
+    if (!MxOpc)
+      continue;
+    BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+      .addReg(MX.PredR)
+      .addOperand(*MX.SrcT)
+      .addOperand(*MX.SrcF);
+    B.erase(MX.Def1);
+    B.erase(MX.Def2);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) {
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  bool Changed = false;
+  for (auto &I : MF)
+    Changed |= genMuxInBlock(I);
+  return Changed;
+}
+
+FunctionPass *llvm::createHexagonGenMux() {
+  return new HexagonGenMux();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
new file mode 100644
index 0000000..d9675b5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -0,0 +1,525 @@
+//===--- HexagonGenPredicate.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gen-pred"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonGenPredicatePass(PassRegistry& Registry);
+  FunctionPass *createHexagonGenPredicate();
+}
+
+namespace {
+  struct Register {
+    unsigned R, S;
+    Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+    Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+    bool operator== (const Register &Reg) const {
+      return R == Reg.R && S == Reg.S;
+    }
+    bool operator< (const Register &Reg) const {
+      return R < Reg.R || (R == Reg.R && S < Reg.S);
+    }
+  };
+  struct PrintRegister {
+    PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
+  private:
+    Register Reg;
+    const TargetRegisterInfo &TRI;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
+    LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
+    return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S);
+  }
+
+  class HexagonGenPredicate : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) {
+      initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon generate predicate operations";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef SetVector<MachineInstr*> VectOfInst;
+    typedef std::set<Register> SetOfReg;
+    typedef std::map<Register,Register> RegToRegMap;
+
+    const HexagonInstrInfo *TII;
+    const HexagonRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    SetOfReg PredGPRs;
+    VectOfInst PUsers;
+    RegToRegMap G2P;
+
+    bool isPredReg(unsigned R);
+    void collectPredicateGPR(MachineFunction &MF);
+    void processPredicateGPR(const Register &Reg);
+    unsigned getPredForm(unsigned Opc);
+    bool isConvertibleToPredForm(const MachineInstr *MI);
+    bool isScalarCmp(unsigned Opc);
+    bool isScalarPred(Register PredReg);
+    Register getPredRegFor(const Register &Reg);
+    bool convertToPredForm(MachineInstr *MI);
+    bool eliminatePredCopies(MachineFunction &MF);
+  };
+
+  char HexagonGenPredicate::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred",
+  "Hexagon generate predicate operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
+  "Hexagon generate predicate operations", false, false)
+
+bool HexagonGenPredicate::isPredReg(unsigned R) {
+  if (!TargetRegisterInfo::isVirtualRegister(R))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  return RC == &Hexagon::PredRegsRegClass;
+}
+
+
+unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
+  using namespace Hexagon;
+
+  switch (Opc) {
+    case A2_and:
+    case A2_andp:
+      return C2_and;
+    case A4_andn:
+    case A4_andnp:
+      return C2_andn;
+    case M4_and_and:
+      return C4_and_and;
+    case M4_and_andn:
+      return C4_and_andn;
+    case M4_and_or:
+      return C4_and_or;
+
+    case A2_or:
+    case A2_orp:
+      return C2_or;
+    case A4_orn:
+    case A4_ornp:
+      return C2_orn;
+    case M4_or_and:
+      return C4_or_and;
+    case M4_or_andn:
+      return C4_or_andn;
+    case M4_or_or:
+      return C4_or_or;
+
+    case A2_xor:
+    case A2_xorp:
+      return C2_xor;
+
+    case C2_tfrrp:
+      return COPY;
+  }
+  // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here
+  // to denote "none", but we need to make sure that none of the valid opcodes
+  // that we return will ever be 0.
+  assert(PHI == 0 && "Use different value for <none>");
+  return 0;
+}
+
+
+bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  if (getPredForm(Opc) != 0)
+    return true;
+
+  // Comparisons against 0 are also convertible. This does not apply to
+  // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which
+  // may not match the value that the predicate register would have if
+  // it was converted to a predicate form.
+  switch (Opc) {
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C4_cmpneqi:
+      if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
+  for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+    MachineBasicBlock &B = *A;
+    for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      MachineInstr *MI = &*I;
+      unsigned Opc = MI->getOpcode();
+      switch (Opc) {
+        case Hexagon::C2_tfrpr:
+        case TargetOpcode::COPY:
+          if (isPredReg(MI->getOperand(1).getReg())) {
+            Register RD = MI->getOperand(0);
+            if (TargetRegisterInfo::isVirtualRegister(RD.R))
+              PredGPRs.insert(RD);
+          }
+          break;
+      }
+    }
+  }
+}
+
+
+void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": "
+               << PrintReg(Reg.R, TRI, Reg.S) << "\n");
+  typedef MachineRegisterInfo::use_iterator use_iterator;
+  use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
+  if (I == E) {
+    DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n');
+    MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+    DefI->eraseFromParent();
+    return;
+  }
+
+  for (; I != E; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (isConvertibleToPredForm(UseI))
+      PUsers.insert(UseI);
+  }
+}
+
+
+Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+  // Create a predicate register for a given Reg. The newly created register
+  // will have its value copied from Reg, so that it can be later used as
+  // an operand in other instructions.
+  assert(TargetRegisterInfo::isVirtualRegister(Reg.R));
+  RegToRegMap::iterator F = G2P.find(Reg);
+  if (F != G2P.end())
+    return F->second;
+
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI));
+  MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+  assert(DefI);
+  unsigned Opc = DefI->getOpcode();
+  if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
+    assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
+    Register PR = DefI->getOperand(1);
+    G2P.insert(std::make_pair(Reg, PR));
+    DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
+    return PR;
+  }
+
+  MachineBasicBlock &B = *DefI->getParent();
+  DebugLoc DL = DefI->getDebugLoc();
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  unsigned NewPR = MRI->createVirtualRegister(PredRC);
+
+  // For convertible instructions, do not modify them, so that they can
+  // be converted later.  Generate a copy from Reg to NewPR.
+  if (isConvertibleToPredForm(DefI)) {
+    MachineBasicBlock::iterator DefIt = DefI;
+    BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
+      .addReg(Reg.R, 0, Reg.S);
+    G2P.insert(std::make_pair(Reg, Register(NewPR)));
+    DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n');
+    return Register(NewPR);
+  }
+
+  llvm_unreachable("Invalid argument");
+}
+
+
+bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C2_cmpgei:
+    case Hexagon::C2_cmpgeui:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmpltei:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpbgtui:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonGenPredicate::isScalarPred(Register PredReg) {
+  std::queue<Register> WorkQ;
+  WorkQ.push(PredReg);
+
+  while (!WorkQ.empty()) {
+    Register PR = WorkQ.front();
+    WorkQ.pop();
+    const MachineInstr *DefI = MRI->getVRegDef(PR.R);
+    if (!DefI)
+      return false;
+    unsigned DefOpc = DefI->getOpcode();
+    switch (DefOpc) {
+      case TargetOpcode::COPY: {
+        const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+        if (MRI->getRegClass(PR.R) != PredRC)
+          return false;
+        // If it is a copy between two predicate registers, fall through.
+      }
+      case Hexagon::C2_and:
+      case Hexagon::C2_andn:
+      case Hexagon::C4_and_and:
+      case Hexagon::C4_and_andn:
+      case Hexagon::C4_and_or:
+      case Hexagon::C2_or:
+      case Hexagon::C2_orn:
+      case Hexagon::C4_or_and:
+      case Hexagon::C4_or_andn:
+      case Hexagon::C4_or_or:
+      case Hexagon::C4_or_orn:
+      case Hexagon::C2_xor:
+        // Add operands to the queue.
+        for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo)
+          if (Mo->isReg() && Mo->isUse())
+            WorkQ.push(Register(Mo->getReg()));
+        break;
+
+      // All non-vector compares are ok, everything else is bad.
+      default:
+        return isScalarCmp(DefOpc);
+    }
+  }
+
+  return true;
+}
+
+
+bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI);
+
+  unsigned Opc = MI->getOpcode();
+  assert(isConvertibleToPredForm(MI));
+  unsigned NumOps = MI->getNumOperands();
+  for (unsigned i = 0; i < NumOps; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    Register Reg(MO);
+    if (Reg.S && Reg.S != Hexagon::subreg_loreg)
+      return false;
+    if (!PredGPRs.count(Reg))
+      return false;
+  }
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned NewOpc = getPredForm(Opc);
+  // Special case for comparisons against 0.
+  if (NewOpc == 0) {
+    switch (Opc) {
+      case Hexagon::C2_cmpeqi:
+        NewOpc = Hexagon::C2_not;
+        break;
+      case Hexagon::C4_cmpneqi:
+        NewOpc = TargetOpcode::COPY;
+        break;
+      default:
+        return false;
+    }
+
+    // If it's a scalar predicate register, then all bits in it are
+    // the same. Otherwise, to determine whether all bits are 0 or not
+    // we would need to use any8.
+    Register PR = getPredRegFor(MI->getOperand(1));
+    if (!isScalarPred(PR))
+      return false;
+    // This will skip the immediate argument when creating the predicate
+    // version instruction.
+    NumOps = 2;
+  }
+
+  // Some sanity: check that def is in operand #0.
+  MachineOperand &Op0 = MI->getOperand(0);
+  assert(Op0.isDef());
+  Register OutR(Op0);
+
+  // Don't use getPredRegFor, since it will create an association between
+  // the argument and a created predicate register (i.e. it will insert a
+  // copy if a new predicate register is created).
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  Register NewPR = MRI->createVirtualRegister(PredRC);
+  MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
+
+  // Add predicate counterparts of the GPRs.
+  for (unsigned i = 1; i < NumOps; ++i) {
+    Register GPR = MI->getOperand(i);
+    Register Pred = getPredRegFor(GPR);
+    MIB.addReg(Pred.R, 0, Pred.S);
+  }
+  DEBUG(dbgs() << "generated: " << *MIB);
+
+  // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
+  // with NewGPR.
+  const TargetRegisterClass *RC = MRI->getRegClass(OutR.R);
+  unsigned NewOutR = MRI->createVirtualRegister(RC);
+  BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR)
+    .addReg(NewPR.R, 0, NewPR.S);
+  MRI->replaceRegWith(OutR.R, NewOutR);
+  MI->eraseFromParent();
+
+  // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn),
+  // then the output will be a predicate register.  Do not visit the
+  // users of it.
+  if (!isPredReg(NewOutR)) {
+    Register R(NewOutR);
+    PredGPRs.insert(R);
+    processPredicateGPR(R);
+  }
+  return true;
+}
+
+
+bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n");
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  bool Changed = false;
+  VectOfInst Erase;
+
+  // First, replace copies
+  //   IntR = PredR1
+  //   PredR2 = IntR
+  // with
+  //   PredR2 = PredR1
+  // Such sequences can be generated when a copy-into-pred is generated from
+  // a gpr register holding a result of a convertible instruction. After
+  // the convertible instruction is converted, its predicate result will be
+  // copied back into the original gpr.
+
+  for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+    MachineBasicBlock &B = *A;
+    for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      if (I->getOpcode() != TargetOpcode::COPY)
+        continue;
+      Register DR = I->getOperand(0);
+      Register SR = I->getOperand(1);
+      if (!TargetRegisterInfo::isVirtualRegister(DR.R))
+        continue;
+      if (!TargetRegisterInfo::isVirtualRegister(SR.R))
+        continue;
+      if (MRI->getRegClass(DR.R) != PredRC)
+        continue;
+      if (MRI->getRegClass(SR.R) != PredRC)
+        continue;
+      assert(!DR.S && !SR.S && "Unexpected subregister");
+      MRI->replaceRegWith(DR.R, SR.R);
+      Erase.insert(I);
+      Changed = true;
+    }
+  }
+
+  for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
+    (*I)->eraseFromParent();
+
+  return Changed;
+}
+
+
+bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  PredGPRs.clear();
+  PUsers.clear();
+  G2P.clear();
+
+  bool Changed = false;
+  collectPredicateGPR(MF);
+  for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I)
+    processPredicateGPR(*I);
+
+  bool Again;
+  do {
+    Again = false;
+    VectOfInst Processed, Copy;
+
+    typedef VectOfInst::iterator iterator;
+    Copy = PUsers;
+    for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
+      MachineInstr *MI = *I;
+      bool Done = convertToPredForm(MI);
+      if (Done) {
+        Processed.insert(MI);
+        Again = true;
+      }
+    }
+    Changed |= Again;
+
+    auto Done = [Processed] (MachineInstr *MI) -> bool {
+      return Processed.count(MI);
+    };
+    PUsers.remove_if(Done);
+  } while (Again);
+
+  Changed |= eliminatePredCopies(MF);
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenPredicate() {
+  return new HexagonGenPredicate();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 0000000..d20a809
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,1965 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction.  The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+//  - Countable loops (w/ ind. var for a trip count)
+//  - Assumes loops are normalized by IndVarSimplify
+//  - Try inner-most loops first
+//  - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hwloops"
+
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
+
+// Option to create preheader only for a specific function.
+static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
+                                 cl::init(""));
+#endif
+
+// Option to create a preheader if one doesn't exist.
+static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
+    cl::Hidden, cl::init(true),
+    cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace llvm {
+  FunctionPass *createHexagonHardwareLoops();
+  void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
+namespace {
+  class CountValue;
+  struct HexagonHardwareLoops : public MachineFunctionPass {
+    MachineLoopInfo            *MLI;
+    MachineRegisterInfo        *MRI;
+    MachineDominatorTree       *MDT;
+    const HexagonInstrInfo     *TII;
+#ifndef NDEBUG
+    static int Counter;
+#endif
+
+  public:
+    static char ID;
+
+    HexagonHardwareLoops() : MachineFunctionPass(ID) {
+      initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    const char *getPassName() const override { return "Hexagon Hardware Loops"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
+
+    /// Kinds of comparisons in the compare instructions.
+    struct Comparison {
+      enum Kind {
+        EQ  = 0x01,
+        NE  = 0x02,
+        L   = 0x04,
+        G   = 0x08,
+        U   = 0x40,
+        LTs = L,
+        LEs = L | EQ,
+        GTs = G,
+        GEs = G | EQ,
+        LTu = L      | U,
+        LEu = L | EQ | U,
+        GTu = G      | U,
+        GEu = G | EQ | U
+      };
+
+      static Kind getSwappedComparison(Kind Cmp) {
+        assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+        if ((Cmp & L) || (Cmp & G))
+          return (Kind)(Cmp ^ (L|G));
+        return Cmp;
+      }
+
+      static Kind getNegatedComparison(Kind Cmp) {
+        if ((Cmp & L) || (Cmp & G))
+          return (Kind)((Cmp ^ (L | G)) ^ EQ);
+        if ((Cmp & NE) || (Cmp & EQ))
+          return (Kind)(Cmp ^ (EQ | NE));
+        return (Kind)0;
+      }
+
+      static bool isSigned(Kind Cmp) {
+        return (Cmp & (L | G) && !(Cmp & U));
+      }
+
+      static bool isUnsigned(Kind Cmp) {
+        return (Cmp & U);
+      }
+
+    };
+
+    /// \brief Find the register that contains the loop controlling
+    /// induction variable.
+    /// If successful, it will return true and set the \p Reg, \p IVBump
+    /// and \p IVOp arguments.  Otherwise it will return false.
+    /// The returned induction register is the register R that follows the
+    /// following induction pattern:
+    /// loop:
+    ///   R = phi ..., [ R.next, LatchBlock ]
+    ///   R.next = R + #bump
+    ///   if (R.next < #N) goto loop
+    /// IVBump is the immediate value added to R, and IVOp is the instruction
+    /// "R.next = R + #bump".
+    bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+                               int64_t &IVBump, MachineInstr *&IVOp) const;
+
+    /// \brief Return the comparison kind for the specified opcode.
+    Comparison::Kind getComparisonKind(unsigned CondOpc,
+                                       MachineOperand *InitialValue,
+                                       const MachineOperand *Endvalue,
+                                       int64_t IVBump) const;
+
+    /// \brief Analyze the statements in a loop to determine if the loop
+    /// has a computable trip count and, if so, return a value that represents
+    /// the trip count expression.
+    CountValue *getLoopTripCount(MachineLoop *L,
+                                 SmallVectorImpl<MachineInstr *> &OldInsts);
+
+    /// \brief Return the expression that represents the number of times
+    /// a loop iterates.  The function takes the operands that represent the
+    /// loop start value, loop end value, and induction value.  Based upon
+    /// these operands, the function attempts to compute the trip count.
+    /// If the trip count is not directly available (as an immediate value,
+    /// or a register), the function will attempt to insert computation of it
+    /// to the loop's preheader.
+    CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
+                             const MachineOperand *End, unsigned IVReg,
+                             int64_t IVBump, Comparison::Kind Cmp) const;
+
+    /// \brief Return true if the instruction is not valid within a hardware
+    /// loop.
+    bool isInvalidLoopOperation(const MachineInstr *MI,
+                                bool IsInnerHWLoop) const;
+
+    /// \brief Return true if the loop contains an instruction that inhibits
+    /// using the hardware loop.
+    bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
+
+    /// \brief Given a loop, check if we can convert it to a hardware loop.
+    /// If so, then perform the conversion and return true.
+    bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
+
+    /// \brief Return true if the instruction is now dead.
+    bool isDead(const MachineInstr *MI,
+                SmallVectorImpl<MachineInstr *> &DeadPhis) const;
+
+    /// \brief Remove the instruction if it is now dead.
+    void removeIfDead(MachineInstr *MI);
+
+    /// \brief Make sure that the "bump" instruction executes before the
+    /// compare.  We need that for the IV fixup, so that the compare
+    /// instruction would not use a bumped value that has not yet been
+    /// defined.  If the instructions are out of order, try to reorder them.
+    bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+    /// \brief Return true if MO and MI pair is visited only once. If visited
+    /// more than once, this indicates there is recursion. In such a case,
+    /// return false.
+    bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
+                      const MachineOperand *MO,
+                      LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Return true if the Phi may generate a value that may underflow,
+    /// or may wrap.
+    bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
+                               MachineBasicBlock *MBB, MachineLoop *L,
+                               LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Return true if the induction variable may underflow an unsigned
+    /// value in the first iteration.
+    bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
+                                     const MachineOperand *EndVal,
+                                     MachineBasicBlock *MBB, MachineLoop *L,
+                                     LoopFeederMap &LoopFeederPhi) const;
+
+    /// \brief Check if the given operand has a compile-time known constant
+    /// value. Return true if yes, and false otherwise. When returning true, set
+    /// Val to the corresponding constant value.
+    bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
+
+    /// \brief Check if the operand has a compile-time known constant value.
+    bool isImmediate(const MachineOperand &MO) const {
+      int64_t V;
+      return checkForImmediate(MO, V);
+    }
+
+    /// \brief Return the immediate for the specified operand.
+    int64_t getImmediate(const MachineOperand &MO) const {
+      int64_t V;
+      if (!checkForImmediate(MO, V))
+        llvm_unreachable("Invalid operand");
+      return V;
+    }
+
+    /// \brief Reset the given machine operand to now refer to a new immediate
+    /// value.  Assumes that the operand was already referencing an immediate
+    /// value, either directly, or via a register.
+    void setImmediate(MachineOperand &MO, int64_t Val);
+
+    /// \brief Fix the data flow of the induction varible.
+    /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+    ///                                     |
+    ///                                     +-> back to phi
+    /// where "bump" is the increment of the induction variable:
+    ///   iv = iv + #const.
+    /// Due to some prior code transformations, the actual flow may look
+    /// like this:
+    ///   phi -+-> bump ---> back to phi
+    ///        |
+    ///        +-> comparison-in-latch (against upper_bound-bump),
+    /// i.e. the comparison that controls the loop execution may be using
+    /// the value of the induction variable from before the increment.
+    ///
+    /// Return true if the loop's flow is the desired one (i.e. it's
+    /// either been fixed, or no fixing was necessary).
+    /// Otherwise, return false.  This can happen if the induction variable
+    /// couldn't be identified, or if the value in the latch's comparison
+    /// cannot be adjusted to reflect the post-bump value.
+    bool fixupInductionVariable(MachineLoop *L);
+
+    /// \brief Given a loop, if it does not have a preheader, create one.
+    /// Return the block that is the preheader.
+    MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
+  };
+
+  char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+  int HexagonHardwareLoops::Counter = 0;
+#endif
+
+  /// \brief Abstraction for a trip count of a loop. A smaller version
+  /// of the MachineOperand class without the concerns of changing the
+  /// operand representation.
+  class CountValue {
+  public:
+    enum CountValueType {
+      CV_Register,
+      CV_Immediate
+    };
+  private:
+    CountValueType Kind;
+    union Values {
+      struct {
+        unsigned Reg;
+        unsigned Sub;
+      } R;
+      unsigned ImmVal;
+    } Contents;
+
+  public:
+    explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+      Kind = t;
+      if (Kind == CV_Register) {
+        Contents.R.Reg = v;
+        Contents.R.Sub = u;
+      } else {
+        Contents.ImmVal = v;
+      }
+    }
+    bool isReg() const { return Kind == CV_Register; }
+    bool isImm() const { return Kind == CV_Immediate; }
+
+    unsigned getReg() const {
+      assert(isReg() && "Wrong CountValue accessor");
+      return Contents.R.Reg;
+    }
+    unsigned getSubReg() const {
+      assert(isReg() && "Wrong CountValue accessor");
+      return Contents.R.Sub;
+    }
+    unsigned getImm() const {
+      assert(isImm() && "Wrong CountValue accessor");
+      return Contents.ImmVal;
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
+      if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+      if (isImm()) { OS << Contents.ImmVal; }
+    }
+  };
+} // end anonymous namespace
+
+
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+                      "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+                    "Hexagon Hardware Loops", false, false)
+
+FunctionPass *llvm::createHexagonHardwareLoops() {
+  return new HexagonHardwareLoops();
+}
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+  bool Changed = false;
+
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
+  for (auto &L : *MLI)
+    if (!L->getParentLoop()) {
+      bool L0Used = false;
+      bool L1Used = false;
+      Changed |= convertToHardwareLoop(L, L0Used, L1Used);
+    }
+
+  return Changed;
+}
+
+/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
+/// return the exiting block. Return 'null' when multiple exiting blocks are
+/// present.
+static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
+  if (MachineBasicBlock *Latch = L->getLoopLatch()) {
+    if (L->isLoopExiting(Latch))
+      return Latch;
+    else
+      return L->getExitingBlock();
+  }
+  return nullptr;
+}
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+                                                 unsigned &Reg,
+                                                 int64_t &IVBump,
+                                                 MachineInstr *&IVOp
+                                                 ) const {
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!Header || !Preheader || !Latch || !ExitingBlock)
+    return false;
+
+  // This pair represents an induction register together with an immediate
+  // value that will be added to it in each loop iteration.
+  typedef std::pair<unsigned,int64_t> RegisterBump;
+
+  // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
+  // from an induction operation
+  //   R.next = R + bump
+  // where bump is an immediate value.
+  typedef std::map<unsigned,RegisterBump> InductionMap;
+
+  InductionMap IndMap;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+       I != E && I->isPHI(); ++I) {
+    MachineInstr *Phi = &*I;
+
+    // Have a PHI instruction.  Get the operand that corresponds to the
+    // latch block, and see if is a result of an addition of form "reg+imm",
+    // where the "reg" is defined by the PHI node we are looking at.
+    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+      if (Phi->getOperand(i+1).getMBB() != Latch)
+        continue;
+
+      unsigned PhiOpReg = Phi->getOperand(i).getReg();
+      MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+      unsigned UpdOpc = DI->getOpcode();
+      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
+
+      if (isAdd) {
+        // If the register operand to the add is the PHI we're looking at, this
+        // meets the induction pattern.
+        unsigned IndReg = DI->getOperand(1).getReg();
+        MachineOperand &Opnd2 = DI->getOperand(2);
+        int64_t V;
+        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
+          unsigned UpdReg = DI->getOperand(0).getReg();
+          IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+        }
+      }
+    }  // for (i)
+  }  // for (instr)
+
+  SmallVector<MachineOperand,2> Cond;
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+  if (NotAnalyzed)
+    return false;
+
+  unsigned PredR, PredPos, PredRegFlags;
+  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
+    return false;
+
+  MachineInstr *PredI = MRI->getVRegDef(PredR);
+  if (!PredI->isCompare())
+    return false;
+
+  unsigned CmpReg1 = 0, CmpReg2 = 0;
+  int CmpImm = 0, CmpMask = 0;
+  bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+                                         CmpMask, CmpImm);
+  // Fail if the compare was not analyzed, or it's not comparing a register
+  // with an immediate value.  Not checking the mask here, since we handle
+  // the individual compare opcodes (including A4_cmpb*) later on.
+  if (!CmpAnalyzed)
+    return false;
+
+  // Exactly one of the input registers to the comparison should be among
+  // the induction registers.
+  InductionMap::iterator IndMapEnd = IndMap.end();
+  InductionMap::iterator F = IndMapEnd;
+  if (CmpReg1 != 0) {
+    InductionMap::iterator F1 = IndMap.find(CmpReg1);
+    if (F1 != IndMapEnd)
+      F = F1;
+  }
+  if (CmpReg2 != 0) {
+    InductionMap::iterator F2 = IndMap.find(CmpReg2);
+    if (F2 != IndMapEnd) {
+      if (F != IndMapEnd)
+        return false;
+      F = F2;
+    }
+  }
+  if (F == IndMapEnd)
+    return false;
+
+  Reg = F->second.first;
+  IVBump = F->second.second;
+  IVOp = MRI->getVRegDef(F->first);
+  return true;
+}
+
+// Return the comparison kind for the specified opcode.
+HexagonHardwareLoops::Comparison::Kind
+HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
+                                        MachineOperand *InitialValue,
+                                        const MachineOperand *EndValue,
+                                        int64_t IVBump) const {
+  Comparison::Kind Cmp = (Comparison::Kind)0;
+  switch (CondOpc) {
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpeqp:
+    Cmp = Comparison::EQ;
+    break;
+  case Hexagon::C4_cmpneq:
+  case Hexagon::C4_cmpneqi:
+    Cmp = Comparison::NE;
+    break;
+  case Hexagon::C4_cmplte:
+    Cmp = Comparison::LEs;
+    break;
+  case Hexagon::C4_cmplteu:
+    Cmp = Comparison::LEu;
+    break;
+  case Hexagon::C2_cmpgtui:
+  case Hexagon::C2_cmpgtu:
+  case Hexagon::C2_cmpgtup:
+    Cmp = Comparison::GTu;
+    break;
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtp:
+    Cmp = Comparison::GTs;
+    break;
+  default:
+    return (Comparison::Kind)0;
+  }
+  return Cmp;
+}
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
+///
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+    SmallVectorImpl<MachineInstr *> &OldInsts) {
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+  assert(PI != TopMBB->pred_end() &&
+         "Loop must have more than one incoming edge!");
+  MachineBasicBlock *Backedge = *PI++;
+  if (PI == TopMBB->pred_end())  // dead loop?
+    return nullptr;
+  MachineBasicBlock *Incoming = *PI++;
+  if (PI != TopMBB->pred_end())  // multiple backedges?
+    return nullptr;
+
+  // Make sure there is one incoming and one backedge and determine which
+  // is which.
+  if (L->contains(Incoming)) {
+    if (L->contains(Backedge))
+      return nullptr;
+    std::swap(Incoming, Backedge);
+  } else if (!L->contains(Backedge))
+    return nullptr;
+
+  // Look for the cmp instruction to determine if we can get a useful trip
+  // count.  The trip count can be either a register or an immediate.  The
+  // location of the value depends upon the type (reg or imm).
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  if (!ExitingBlock)
+    return nullptr;
+
+  unsigned IVReg = 0;
+  int64_t IVBump = 0;
+  MachineInstr *IVOp;
+  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+  if (!FoundIV)
+    return nullptr;
+
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+  MachineOperand *InitialValue = nullptr;
+  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+    MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+    if (MBB == Preheader)
+      InitialValue = &IV_Phi->getOperand(i);
+    else if (MBB == Latch)
+      IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
+  }
+  if (!InitialValue)
+    return nullptr;
+
+  SmallVector<MachineOperand,2> Cond;
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+  if (NotAnalyzed)
+    return nullptr;
+
+  MachineBasicBlock *Header = L->getHeader();
+  // TB must be non-null.  If FB is also non-null, one of them must be
+  // the header.  Otherwise, branch to TB could be exiting the loop, and
+  // the fall through can go to the header.
+  assert (TB && "Exit block without a branch?");
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return nullptr;
+    if (TB == Latch)
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB: LFB;
+  }
+  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+  if (!TB || (FB && TB != Header && FB != Header))
+    return nullptr;
+
+  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+  // to put imm(0), followed by P in the vector Cond.
+  // If TB is not the header, it means that the "not-taken" path must lead
+  // to the header.
+  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
+  unsigned PredReg, PredPos, PredRegFlags;
+  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
+    return nullptr;
+  MachineInstr *CondI = MRI->getVRegDef(PredReg);
+  unsigned CondOpc = CondI->getOpcode();
+
+  unsigned CmpReg1 = 0, CmpReg2 = 0;
+  int Mask = 0, ImmValue = 0;
+  bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+                                         Mask, ImmValue);
+  if (!AnalyzedCmp)
+    return nullptr;
+
+  // The comparison operator type determines how we compute the loop
+  // trip count.
+  OldInsts.push_back(CondI);
+  OldInsts.push_back(IVOp);
+
+  // Sadly, the following code gets information based on the position
+  // of the operands in the compare instruction.  This has to be done
+  // this way, because the comparisons check for a specific relationship
+  // between the operands (e.g. is-less-than), rather than to find out
+  // what relationship the operands are in (as on PPC).
+  Comparison::Kind Cmp;
+  bool isSwapped = false;
+  const MachineOperand &Op1 = CondI->getOperand(1);
+  const MachineOperand &Op2 = CondI->getOperand(2);
+  const MachineOperand *EndValue = nullptr;
+
+  if (Op1.isReg()) {
+    if (Op2.isImm() || Op1.getReg() == IVReg)
+      EndValue = &Op2;
+    else {
+      EndValue = &Op1;
+      isSwapped = true;
+    }
+  }
+
+  if (!EndValue)
+    return nullptr;
+
+  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
+  if (!Cmp)
+    return nullptr;
+  if (Negated)
+    Cmp = Comparison::getNegatedComparison(Cmp);
+  if (isSwapped)
+    Cmp = Comparison::getSwappedComparison(Cmp);
+
+  if (InitialValue->isReg()) {
+    unsigned R = InitialValue->getReg();
+    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+    if (!MDT->properlyDominates(DefBB, Header))
+      return nullptr;
+    OldInsts.push_back(MRI->getVRegDef(R));
+  }
+  if (EndValue->isReg()) {
+    unsigned R = EndValue->getReg();
+    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+    if (!MDT->properlyDominates(DefBB, Header))
+      return nullptr;
+    OldInsts.push_back(MRI->getVRegDef(R));
+  }
+
+  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
+}
+
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates.  The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+                                               const MachineOperand *Start,
+                                               const MachineOperand *End,
+                                               unsigned IVReg,
+                                               int64_t IVBump,
+                                               Comparison::Kind Cmp) const {
+  // Cannot handle comparison EQ, i.e. while (A == B).
+  if (Cmp == Comparison::EQ)
+    return nullptr;
+
+  // Check if either the start or end values are an assignment of an immediate.
+  // If so, use the immediate value rather than the register.
+  if (Start->isReg()) {
+    const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+    if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+                          StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
+      Start = &StartValInstr->getOperand(1);
+  }
+  if (End->isReg()) {
+    const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+    if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
+                        EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
+      End = &EndValInstr->getOperand(1);
+  }
+
+  if (!Start->isReg() && !Start->isImm())
+    return nullptr;
+  if (!End->isReg() && !End->isImm())
+    return nullptr;
+
+  bool CmpLess =     Cmp & Comparison::L;
+  bool CmpGreater =  Cmp & Comparison::G;
+  bool CmpHasEqual = Cmp & Comparison::EQ;
+
+  // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
+  if (CmpLess && IVBump < 0)
+    // Loop going while iv is "less" with the iv value going down.  Must wrap.
+    return nullptr;
+
+  if (CmpGreater && IVBump > 0)
+    // Loop going while iv is "greater" with the iv value going up.  Must wrap.
+    return nullptr;
+
+  // Phis that may feed into the loop.
+  LoopFeederMap LoopFeederPhi;
+
+  // Check if the initial value may be zero and can be decremented in the first
+  // iteration. If the value is zero, the endloop instruction will not decrement
+  // the loop counter, so we shouldn't generate a hardware loop in this case.
+  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
+                                  LoopFeederPhi))
+      return nullptr;
+
+  if (Start->isImm() && End->isImm()) {
+    // Both, start and end are immediates.
+    int64_t StartV = Start->getImm();
+    int64_t EndV = End->getImm();
+    int64_t Dist = EndV - StartV;
+    if (Dist == 0)
+      return nullptr;
+
+    bool Exact = (Dist % IVBump) == 0;
+
+    if (Cmp == Comparison::NE) {
+      if (!Exact)
+        return nullptr;
+      if ((Dist < 0) ^ (IVBump < 0))
+        return nullptr;
+    }
+
+    // For comparisons that include the final value (i.e. include equality
+    // with the final value), we need to increase the distance by 1.
+    if (CmpHasEqual)
+      Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+    // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
+    // CmpGreater should imply Dist < 0.  These conditions could actually
+    // fail, for example, in unreachable code (which may still appear to be
+    // reachable in the CFG).
+    if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
+      return nullptr;
+
+    // "Normalized" distance, i.e. with the bump set to +-1.
+    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
+                                 : (-Dist + (-IVBump - 1)) / (-IVBump);
+    assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
+
+    uint64_t Count = Dist1;
+
+    if (Count > 0xFFFFFFFFULL)
+      return nullptr;
+
+    return new CountValue(CountValue::CV_Immediate, Count);
+  }
+
+  // A general case: Start and End are some values, but the actual
+  // iteration count may not be available.  If it is not, insert
+  // a computation of it into the preheader.
+
+  // If the induction variable bump is not a power of 2, quit.
+  // Othwerise we'd need a general integer division.
+  if (!isPowerOf2_64(std::abs(IVBump)))
+    return nullptr;
+
+  MachineBasicBlock *PH = Loop->getLoopPreheader();
+  assert (PH && "Should have a preheader by now");
+  MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+  DebugLoc DL;
+  if (InsertPos != PH->end())
+    DL = InsertPos->getDebugLoc();
+
+  // If Start is an immediate and End is a register, the trip count
+  // will be "reg - imm".  Hexagon's "subtract immediate" instruction
+  // is actually "reg + -imm".
+
+  // If the loop IV is going downwards, i.e. if the bump is negative,
+  // then the iteration count (computed as End-Start) will need to be
+  // negated.  To avoid the negation, just swap Start and End.
+  if (IVBump < 0) {
+    std::swap(Start, End);
+    IVBump = -IVBump;
+  }
+  // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
+  // Signedness, and "including equality" are preserved.
+
+  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+  int64_t StartV = 0, EndV = 0;
+  if (Start->isImm())
+    StartV = Start->getImm();
+  if (End->isImm())
+    EndV = End->getImm();
+
+  int64_t AdjV = 0;
+  // To compute the iteration count, we would need this computation:
+  //   Count = (End - Start + (IVBump-1)) / IVBump
+  // or, when CmpHasEqual:
+  //   Count = (End - Start + (IVBump-1)+1) / IVBump
+  // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
+  // generating an instruction specifically to add it if we can adjust
+  // the immediate values for Start or End.
+
+  if (CmpHasEqual) {
+    // Need to add 1 to the total iteration count.
+    if (Start->isImm())
+      StartV--;
+    else if (End->isImm())
+      EndV++;
+    else
+      AdjV += 1;
+  }
+
+  if (Cmp != Comparison::NE) {
+    if (Start->isImm())
+      StartV -= (IVBump-1);
+    else if (End->isImm())
+      EndV += (IVBump-1);
+    else
+      AdjV += (IVBump-1);
+  }
+
+  unsigned R = 0, SR = 0;
+  if (Start->isReg()) {
+    R = Start->getReg();
+    SR = Start->getSubReg();
+  } else {
+    R = End->getReg();
+    SR = End->getSubReg();
+  }
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  // Hardware loops cannot handle 64-bit registers.  If it's a double
+  // register, it has to have a subregister.
+  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+    return nullptr;
+  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+  // Compute DistR (register with the distance between Start and End).
+  unsigned DistR, DistSR;
+
+  // Avoid special case, where the start value is an imm(0).
+  if (Start->isImm() && StartV == 0) {
+    DistR = End->getReg();
+    DistSR = End->getSubReg();
+  } else {
+    const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
+                              (RegToImm ? TII->get(Hexagon::A2_subri) :
+                                          TII->get(Hexagon::A2_addi));
+    if (RegToReg || RegToImm) {
+      unsigned SubR = MRI->createVirtualRegister(IntRC);
+      MachineInstrBuilder SubIB =
+        BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+      if (RegToReg)
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      else
+        SubIB.addImm(EndV)
+          .addReg(Start->getReg(), 0, Start->getSubReg());
+      DistR = SubR;
+    } else {
+      // If the loop has been unrolled, we should use the original loop count
+      // instead of recalculating the value. This will avoid additional
+      // 'Add' instruction.
+      const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+      if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+          EndValInstr->getOperand(2).getImm() == StartV) {
+        DistR = EndValInstr->getOperand(1).getReg();
+      } else {
+        unsigned SubR = MRI->createVirtualRegister(IntRC);
+        MachineInstrBuilder SubIB =
+          BuildMI(*PH, InsertPos, DL, SubD, SubR);
+        SubIB.addReg(End->getReg(), 0, End->getSubReg())
+             .addImm(-StartV);
+        DistR = SubR;
+      }
+    }
+    DistSR = 0;
+  }
+
+  // From DistR, compute AdjR (register with the adjusted distance).
+  unsigned AdjR, AdjSR;
+
+  if (AdjV == 0) {
+    AdjR = DistR;
+    AdjSR = DistSR;
+  } else {
+    // Generate CountR = ADD DistR, AdjVal
+    unsigned AddR = MRI->createVirtualRegister(IntRC);
+    MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
+    BuildMI(*PH, InsertPos, DL, AddD, AddR)
+      .addReg(DistR, 0, DistSR)
+      .addImm(AdjV);
+
+    AdjR = AddR;
+    AdjSR = 0;
+  }
+
+  // From AdjR, compute CountR (register with the final count).
+  unsigned CountR, CountSR;
+
+  if (IVBump == 1) {
+    CountR = AdjR;
+    CountSR = AdjSR;
+  } else {
+    // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+    unsigned Shift = Log2_32(IVBump);
+
+    // Generate NormR = LSR DistR, Shift.
+    unsigned LsrR = MRI->createVirtualRegister(IntRC);
+    const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
+    BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+      .addReg(AdjR, 0, AdjSR)
+      .addImm(Shift);
+
+    CountR = LsrR;
+    CountSR = 0;
+  }
+
+  return new CountValue(CountValue::CV_Register, CountR, CountSR);
+}
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
+                                                  bool IsInnerHWLoop) const {
+
+  // Call is not allowed because the callee may use a hardware loop except for
+  // the case when the call never returns.
+  if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
+    return true;
+
+  // Check if the instruction defines a hardware loop register.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
+                          R == Hexagon::LC1 || R == Hexagon::SA1))
+      return true;
+    if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
+      return true;
+  }
+  return false;
+}
+
+/// \brief Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop instruction.
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
+    bool IsInnerHWLoop) const {
+  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+  DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = Blocks[i];
+    for (MachineBasicBlock::iterator
+           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+      const MachineInstr *MI = &*MII;
+      if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+        DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// \brief Returns true if the instruction is dead.  This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+                              SmallVectorImpl<MachineInstr *> &DeadPhis) const {
+  // Examine each operand.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (MRI->use_nodbg_empty(Reg))
+      continue;
+
+    typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+    // This instruction has users, but if the only user is the phi node for the
+    // parent block, and the only use of that phi node is this instruction, then
+    // this instruction is dead: both it (and the phi node) can be removed.
+    use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+    use_nodbg_iterator End = MRI->use_nodbg_end();
+    if (std::next(I) != End || !I->getParent()->isPHI())
+      return false;
+
+    MachineInstr *OnePhi = I->getParent();
+    for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+      const MachineOperand &OPO = OnePhi->getOperand(j);
+      if (!OPO.isReg() || !OPO.isDef())
+        continue;
+
+      unsigned OPReg = OPO.getReg();
+      use_nodbg_iterator nextJ;
+      for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+           J != End; J = nextJ) {
+        nextJ = std::next(J);
+        MachineOperand &Use = *J;
+        MachineInstr *UseMI = Use.getParent();
+
+        // If the phi node has a user that is not MI, bail.
+        if (MI != UseMI)
+          return false;
+      }
+    }
+    DeadPhis.push_back(OnePhi);
+  }
+
+  // If there are no defs with uses, the instruction is dead.
+  return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+  // This procedure was essentially copied from DeadMachineInstructionElim.
+
+  SmallVector<MachineInstr*, 1> DeadPhis;
+  if (isDead(MI, DeadPhis)) {
+    DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+    // It is possible that some DBG_VALUE instructions refer to this
+    // instruction.  Examine each def operand for such references;
+    // if found, mark the DBG_VALUE as undef (but don't delete it).
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned Reg = MO.getReg();
+      MachineRegisterInfo::use_iterator nextI;
+      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+           E = MRI->use_end(); I != E; I = nextI) {
+        nextI = std::next(I);  // I is invalidated by the setReg
+        MachineOperand &Use = *I;
+        MachineInstr *UseMI = I->getParent();
+        if (UseMI == MI)
+          continue;
+        if (Use.isDebug())
+          UseMI->getOperand(0).setReg(0U);
+      }
+    }
+
+    MI->eraseFromParent();
+    for (unsigned i = 0; i < DeadPhis.size(); ++i)
+      DeadPhis[i]->eraseFromParent();
+  }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop.  If so, then perform the transformation.
+///
+/// This function works on innermost loops first.  A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
+///
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
+                                                 bool &RecL0used,
+                                                 bool &RecL1used) {
+  // This is just for sanity.
+  assert(L->getHeader() && "Loop without a header?");
+
+  bool Changed = false;
+  bool L0Used = false;
+  bool L1Used = false;
+
+  // Process nested loops first.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+    L0Used |= RecL0used;
+    L1Used |= RecL1used;
+  }
+
+  // If a nested loop has been converted, then we can't convert this loop.
+  if (Changed && L0Used && L1Used)
+    return Changed;
+
+  unsigned LOOP_i;
+  unsigned LOOP_r;
+  unsigned ENDLOOP;
+
+  // Flag used to track loopN instruction:
+  // 1 - Hardware loop is being generated for the inner most loop.
+  // 0 - Hardware loop is being generated for the outer loop.
+  unsigned IsInnerHWLoop = 1;
+
+  if (L0Used) {
+    LOOP_i = Hexagon::J2_loop1i;
+    LOOP_r = Hexagon::J2_loop1r;
+    ENDLOOP = Hexagon::ENDLOOP1;
+    IsInnerHWLoop = 0;
+  } else {
+    LOOP_i = Hexagon::J2_loop0i;
+    LOOP_r = Hexagon::J2_loop0r;
+    ENDLOOP = Hexagon::ENDLOOP0;
+  }
+
+#ifndef NDEBUG
+  // Stop trying after reaching the limit (if any).
+  int Limit = HWLoopLimit;
+  if (Limit >= 0) {
+    if (Counter >= HWLoopLimit)
+      return false;
+    Counter++;
+  }
+#endif
+
+  // Does the loop contain any invalid instructions?
+  if (containsInvalidInstruction(L, IsInnerHWLoop))
+    return false;
+
+  MachineBasicBlock *LastMBB = getExitingBlock(L);
+  // Don't generate hw loop if the loop has more than one exit.
+  if (!LastMBB)
+    return false;
+
+  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+  if (LastI == LastMBB->end())
+    return false;
+
+  // Is the induction variable bump feeding the latch condition?
+  if (!fixupInductionVariable(L))
+    return false;
+
+  // Ensure the loop has a preheader: the loop instruction will be
+  // placed there.
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = createPreheaderForLoop(L);
+    if (!Preheader)
+      return false;
+  }
+
+  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+  SmallVector<MachineInstr*, 2> OldInsts;
+  // Are we able to determine the trip count for the loop?
+  CountValue *TripCount = getLoopTripCount(L, OldInsts);
+  if (!TripCount)
+    return false;
+
+  // Is the trip count available in the preheader?
+  if (TripCount->isReg()) {
+    // There will be a use of the register inserted into the preheader,
+    // so make sure that the register is actually defined at that point.
+    MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+    MachineBasicBlock *BBDef = TCDef->getParent();
+    if (!MDT->dominates(BBDef, Preheader))
+      return false;
+  }
+
+  // Determine the loop start.
+  MachineBasicBlock *TopBlock = L->getTopBlock();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *LoopStart = 0;
+  if (ExitingBlock !=  L->getLoopLatch()) {
+    MachineBasicBlock *TB = 0, *FB = 0;
+    SmallVector<MachineOperand, 2> Cond;
+
+    if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false))
+      return false;
+
+    if (L->contains(TB))
+      LoopStart = TB;
+    else if (L->contains(FB))
+      LoopStart = FB;
+    else
+      return false;
+  }
+  else
+    LoopStart = TopBlock;
+
+  // Convert the loop to a hardware loop.
+  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+  DebugLoc DL;
+  if (InsertPos != Preheader->end())
+    DL = InsertPos->getDebugLoc();
+
+  if (TripCount->isReg()) {
+    // Create a copy of the loop count register.
+    unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+      .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
+    // Add the Loop instruction to the beginning of the loop.
+    BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
+      .addReg(CountReg);
+  } else {
+    assert(TripCount->isImm() && "Expecting immediate value for trip count");
+    // Add the Loop immediate instruction to the beginning of the loop,
+    // if the immediate fits in the instructions.  Otherwise, we need to
+    // create a new virtual register.
+    int64_t CountImm = TripCount->getImm();
+    if (!TII->isValidOffset(LOOP_i, CountImm)) {
+      unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
+        .addImm(CountImm);
+      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
+        .addMBB(LoopStart).addReg(CountReg);
+    } else
+      BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
+        .addMBB(LoopStart).addImm(CountImm);
+  }
+
+  // Make sure the loop start always has a reference in the CFG.  We need
+  // to create a BlockAddress operand to get this mechanism to work both the
+  // MachineBasicBlock and BasicBlock objects need the flag set.
+  LoopStart->setHasAddressTaken();
+  // This line is needed to set the hasAddressTaken flag on the BasicBlock
+  // object.
+  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+  // Replace the loop branch with an endloop instruction.
+  DebugLoc LastIDL = LastI->getDebugLoc();
+  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
+
+  // The loop ends with either:
+  //  - a conditional branch followed by an unconditional branch, or
+  //  - a conditional branch to the loop start.
+  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
+      LastI->getOpcode() == Hexagon::J2_jumpf) {
+    // Delete one and change/add an uncond. branch to out of the loop.
+    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+    LastI = LastMBB->erase(LastI);
+    if (!L->contains(BranchTarget)) {
+      if (LastI != LastMBB->end())
+        LastI = LastMBB->erase(LastI);
+      SmallVector<MachineOperand, 0> Cond;
+      TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
+    }
+  } else {
+    // Conditional branch to loop start; just delete it.
+    LastMBB->erase(LastI);
+  }
+  delete TripCount;
+
+  // The induction operation and the comparison may now be
+  // unneeded. If these are unneeded, then remove them.
+  for (unsigned i = 0; i < OldInsts.size(); ++i)
+    removeIfDead(OldInsts[i]);
+
+  ++NumHWLoops;
+
+  // Set RecL1used and RecL0used only after hardware loop has been
+  // successfully generated. Doing it earlier can cause wrong loop instruction
+  // to be used.
+  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
+    RecL1used = true;
+  else
+    RecL0used = true;
+
+  return true;
+}
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+                                            MachineInstr *CmpI) {
+  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+  MachineBasicBlock *BB = BumpI->getParent();
+  if (CmpI->getParent() != BB)
+    return false;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  // Check if things are in order to begin with.
+  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
+    if (&*I == CmpI)
+      return true;
+
+  // Out of order.
+  unsigned PredR = CmpI->getOperand(0).getReg();
+  bool FoundBump = false;
+  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
+  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+    MachineInstr *In = &*I;
+    for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+      MachineOperand &MO = In->getOperand(i);
+      if (MO.isReg() && MO.isUse()) {
+        if (MO.getReg() == PredR)  // Found an intervening use of PredR.
+          return false;
+      }
+    }
+
+    if (In == BumpI) {
+      BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
+      FoundBump = true;
+      break;
+    }
+  }
+  assert (FoundBump && "Cannot determine instruction order");
+  return FoundBump;
+}
+
+/// This function is required to break recursion. Visiting phis in a loop may
+/// result in recursion during compilation. We break the recursion by making
+/// sure that we visit a MachineOperand and its definition in a
+/// MachineInstruction only once. If we attempt to visit more than once, then
+/// there is recursion, and will return false.
+bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
+                                        MachineInstr *MI,
+                                        const MachineOperand *MO,
+                                        LoopFeederMap &LoopFeederPhi) const {
+  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
+    const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
+    DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
+    // Ignore all BBs that form Loop.
+    for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+      MachineBasicBlock *MBB = Blocks[i];
+      if (A == MBB)
+        return false;
+    }
+    MachineInstr *Def = MRI->getVRegDef(MO->getReg());
+    LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
+    return true;
+  } else
+    // Already visited node.
+    return false;
+}
+
+/// Return true if a Phi may generate a value that can underflow.
+/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
+bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
+    MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
+    MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
+  assert(Phi->isPHI() && "Expecting a Phi.");
+  // Walk through each Phi, and its used operands. Make sure that
+  // if there is recursion in Phi, we won't generate hardware loops.
+  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
+    if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
+      if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
+                                      Phi->getParent(), L, LoopFeederPhi))
+        return true;
+  return false;
+}
+
+/// Return true if the induction variable can underflow in the first iteration.
+/// An example, is an initial unsigned value that is 0 and is decrement in the
+/// first itertion of a do-while loop.  In this case, we cannot generate a
+/// hardware loop because the endloop instruction does not decrement the loop
+/// counter if it is <= 1. We only need to perform this analysis if the
+/// initial value is a register.
+///
+/// This function assumes the initial value may underfow unless proven
+/// otherwise. If the type is signed, then we don't care because signed
+/// underflow is undefined. We attempt to prove the initial value is not
+/// zero by perfoming a crude analysis of the loop counter. This function
+/// checks if the initial value is used in any comparison prior to the loop
+/// and, if so, assumes the comparison is a range check. This is inexact,
+/// but will catch the simple cases.
+bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
+    const MachineOperand *InitVal, const MachineOperand *EndVal,
+    MachineBasicBlock *MBB, MachineLoop *L,
+    LoopFeederMap &LoopFeederPhi) const {
+  // Only check register values since they are unknown.
+  if (!InitVal->isReg())
+    return false;
+
+  if (!EndVal->isImm())
+    return false;
+
+  // A register value that is assigned an immediate is a known value, and it
+  // won't underflow in the first iteration.
+  int64_t Imm;
+  if (checkForImmediate(*InitVal, Imm))
+    return (EndVal->getImm() == Imm);
+
+  unsigned Reg = InitVal->getReg();
+
+  // We don't know the value of a physical register.
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return true;
+
+  MachineInstr *Def = MRI->getVRegDef(Reg);
+  if (!Def)
+    return true;
+
+  // If the initial value is a Phi or copy and the operands may not underflow,
+  // then the definition cannot be underflow either.
+  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
+                                             L, LoopFeederPhi))
+    return false;
+  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
+                                                    EndVal, Def->getParent(),
+                                                    L, LoopFeederPhi))
+    return false;
+
+  // Iterate over the uses of the initial value. If the initial value is used
+  // in a compare, then we assume this is a range check that ensures the loop
+  // doesn't underflow. This is not an exact test and should be improved.
+  for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
+         E = MRI->use_instr_nodbg_end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    unsigned CmpReg1 = 0, CmpReg2 = 0;
+    int CmpMask = 0, CmpValue = 0;
+
+    if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
+      continue;
+
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 2> Cond;
+    if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
+      continue;
+
+    Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
+    if (Cmp == 0)
+      continue;
+    if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
+      Cmp = Comparison::getNegatedComparison(Cmp);
+    if (CmpReg2 != 0 && CmpReg2 == Reg)
+      Cmp = Comparison::getSwappedComparison(Cmp);
+
+    // Signed underflow is undefined.
+    if (Comparison::isSigned(Cmp))
+      return false;
+
+    // Check if there is a comparison of the initial value. If the initial value
+    // is greater than or not equal to another value, then assume this is a
+    // range check.
+    if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
+      return false;
+  }
+
+  // OK - this is a hack that needs to be improved. We really need to analyze
+  // the instructions performed on the initial value. This works on the simplest
+  // cases only.
+  if (!Def->isCopy() && !Def->isPHI())
+    return false;
+
+  return true;
+}
+
+bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
+                                             int64_t &Val) const {
+  if (MO.isImm()) {
+    Val = MO.getImm();
+    return true;
+  }
+  if (!MO.isReg())
+    return false;
+
+  // MO is a register. Check whether it is defined as an immediate value,
+  // and if so, get the value of it in TV. That value will then need to be
+  // processed to handle potential subregisters in MO.
+  int64_t TV;
+
+  unsigned R = MO.getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(R))
+    return false;
+  MachineInstr *DI = MRI->getVRegDef(R);
+  unsigned DOpc = DI->getOpcode();
+  switch (DOpc) {
+    case TargetOpcode::COPY:
+    case Hexagon::A2_tfrsi:
+    case Hexagon::A2_tfrpi:
+    case Hexagon::CONST32_Int_Real:
+    case Hexagon::CONST64_Int_Real: {
+      // Call recursively to avoid an extra check whether operand(1) is
+      // indeed an immediate (it could be a global address, for example),
+      // plus we can handle COPY at the same time.
+      if (!checkForImmediate(DI->getOperand(1), TV))
+        return false;
+      break;
+    }
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineii:
+    case Hexagon::A4_combineri:
+    case Hexagon::A2_combinew: {
+      const MachineOperand &S1 = DI->getOperand(1);
+      const MachineOperand &S2 = DI->getOperand(2);
+      int64_t V1, V2;
+      if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
+        return false;
+      TV = V2 | (V1 << 32);
+      break;
+    }
+    case TargetOpcode::REG_SEQUENCE: {
+      const MachineOperand &S1 = DI->getOperand(1);
+      const MachineOperand &S3 = DI->getOperand(3);
+      int64_t V1, V3;
+      if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
+        return false;
+      unsigned Sub2 = DI->getOperand(2).getImm();
+      unsigned Sub4 = DI->getOperand(4).getImm();
+      if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
+        TV = V1 | (V3 << 32);
+      else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
+        TV = V3 | (V1 << 32);
+      else
+        llvm_unreachable("Unexpected form of REG_SEQUENCE");
+      break;
+    }
+
+    default:
+      return false;
+  }
+
+  // By now, we should have successfuly obtained the immediate value defining
+  // the register referenced in MO. Handle a potential use of a subregister.
+  switch (MO.getSubReg()) {
+    case Hexagon::subreg_loreg:
+      Val = TV & 0xFFFFFFFFULL;
+      break;
+    case Hexagon::subreg_hireg:
+      Val = (TV >> 32) & 0xFFFFFFFFULL;
+      break;
+    default:
+      Val = TV;
+      break;
+  }
+  return true;
+}
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+  if (MO.isImm()) {
+    MO.setImm(Val);
+    return;
+  }
+
+  assert(MO.isReg());
+  unsigned R = MO.getReg();
+  MachineInstr *DI = MRI->getVRegDef(R);
+
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  unsigned NewR = MRI->createVirtualRegister(RC);
+  MachineBasicBlock &B = *DI->getParent();
+  DebugLoc DL = DI->getDebugLoc();
+  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
+  MO.setReg(NewR);
+}
+
+static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
+  // These two instructions are not extendable.
+  if (CmpOpc == Hexagon::A4_cmpbeqi)
+    return isUInt<8>(Imm);
+  if (CmpOpc == Hexagon::A4_cmpbgti)
+    return isInt<8>(Imm);
+  // The rest of the comparison-with-immediate instructions are extendable.
+  return true;
+}
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+
+  if (!(Header && Latch && ExitingBlock))
+    return false;
+
+  // These data structures follow the same concept as the corresponding
+  // ones in findInductionRegister (where some comments are).
+  typedef std::pair<unsigned,int64_t> RegisterBump;
+  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+  typedef std::set<RegisterInduction> RegisterInductionSet;
+
+  // Register candidates for induction variables, with their associated bumps.
+  RegisterInductionSet IndRegs;
+
+  // Look for induction patterns:
+  //   vreg1 = PHI ..., [ latch, vreg2 ]
+  //   vreg2 = ADD vreg1, imm
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+       I != E && I->isPHI(); ++I) {
+    MachineInstr *Phi = &*I;
+
+    // Have a PHI instruction.
+    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+      if (Phi->getOperand(i+1).getMBB() != Latch)
+        continue;
+
+      unsigned PhiReg = Phi->getOperand(i).getReg();
+      MachineInstr *DI = MRI->getVRegDef(PhiReg);
+      unsigned UpdOpc = DI->getOpcode();
+      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
+
+      if (isAdd) {
+        // If the register operand to the add/sub is the PHI we are looking
+        // at, this meets the induction pattern.
+        unsigned IndReg = DI->getOperand(1).getReg();
+        MachineOperand &Opnd2 = DI->getOperand(2);
+        int64_t V;
+        if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
+          unsigned UpdReg = DI->getOperand(0).getReg();
+          IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+        }
+      }
+    }  // for (i)
+  }  // for (instr)
+
+  if (IndRegs.empty())
+    return false;
+
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
+  SmallVector<MachineOperand,2> Cond;
+  // AnalyzeBranch returns true if it fails to analyze branch.
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+  if (NotAnalyzed || Cond.empty())
+    return false;
+
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return false;
+
+    // Since latch is not the exiting block, the latch branch should be an
+    // unconditional branch to the loop header.
+    if (TB == Latch)
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB : LFB;
+  }
+  if (TB != Header) {
+    if (FB != Header) {
+      // The latch/exit block does not go back to the header.
+      return false;
+    }
+    // FB is the header (i.e., uncond. jump to branch header)
+    // In this case, the LoopBody -> TB should not be a back edge otherwise
+    // it could result in an infinite loop after conversion to hw_loop.
+    // This case can happen when the Latch has two jumps like this:
+    // Jmp_c OuterLoopHeader <-- TB
+    // Jmp   InnerLoopHeader <-- FB
+    if (MDT->dominates(TB, FB))
+      return false;
+  }
+
+  // Expecting a predicate register as a condition.  It won't be a hardware
+  // predicate register at this point yet, just a vreg.
+  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+  // into Cond, followed by the predicate register.  For non-negated branches
+  // it's just the register.
+  unsigned CSz = Cond.size();
+  if (CSz != 1 && CSz != 2)
+    return false;
+
+  if (!Cond[CSz-1].isReg())
+    return false;
+
+  unsigned P = Cond[CSz-1].getReg();
+  MachineInstr *PredDef = MRI->getVRegDef(P);
+
+  if (!PredDef->isCompare())
+    return false;
+
+  SmallSet<unsigned,2> CmpRegs;
+  MachineOperand *CmpImmOp = nullptr;
+
+  // Go over all operands to the compare and look for immediate and register
+  // operands.  Assume that if the compare has a single register use and a
+  // single immediate operand, then the register is being compared with the
+  // immediate value.
+  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+    MachineOperand &MO = PredDef->getOperand(i);
+    if (MO.isReg()) {
+      // Skip all implicit references.  In one case there was:
+      //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+      if (MO.isImplicit())
+        continue;
+      if (MO.isUse()) {
+        if (!isImmediate(MO)) {
+          CmpRegs.insert(MO.getReg());
+          continue;
+        }
+        // Consider the register to be the "immediate" operand.
+        if (CmpImmOp)
+          return false;
+        CmpImmOp = &MO;
+      }
+    } else if (MO.isImm()) {
+      if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
+        return false;
+      CmpImmOp = &MO;
+    }
+  }
+
+  if (CmpRegs.empty())
+    return false;
+
+  // Check if the compared register follows the order we want.  Fix if needed.
+  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+       I != E; ++I) {
+    // This is a success.  If the register used in the comparison is one that
+    // we have identified as a bumped (updated) induction register, there is
+    // nothing to do.
+    if (CmpRegs.count(I->first))
+      return true;
+
+    // Otherwise, if the register being compared comes out of a PHI node,
+    // and has been recognized as following the induction pattern, and is
+    // compared against an immediate, we can fix it.
+    const RegisterBump &RB = I->second;
+    if (CmpRegs.count(RB.first)) {
+      if (!CmpImmOp) {
+        // If both operands to the compare instruction are registers, see if
+        // it can be changed to use induction register as one of the operands.
+        MachineInstr *IndI = nullptr;
+        MachineInstr *nonIndI = nullptr;
+        MachineOperand *IndMO = nullptr;
+        MachineOperand *nonIndMO = nullptr;
+
+        for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
+          MachineOperand &MO = PredDef->getOperand(i);
+          if (MO.isReg() && MO.getReg() == RB.first) {
+            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+                         << *(MRI->getVRegDef(I->first)));
+            if (IndI)
+              return false;
+
+            IndI = MRI->getVRegDef(I->first);
+            IndMO = &MO;
+          } else if (MO.isReg()) {
+            DEBUG(dbgs() << "\n DefMI(" << i << ") = "
+                         << *(MRI->getVRegDef(MO.getReg())));
+            if (nonIndI)
+              return false;
+
+            nonIndI = MRI->getVRegDef(MO.getReg());
+            nonIndMO = &MO;
+          }
+        }
+        if (IndI && nonIndI &&
+            nonIndI->getOpcode() == Hexagon::A2_addi &&
+            nonIndI->getOperand(2).isImm() &&
+            nonIndI->getOperand(2).getImm() == - RB.second) {
+          bool Order = orderBumpCompare(IndI, PredDef);
+          if (Order) {
+            IndMO->setReg(I->first);
+            nonIndMO->setReg(nonIndI->getOperand(1).getReg());
+            return true;
+          }
+        }
+        return false;
+      }
+
+      // It is not valid to do this transformation on an unsigned comparison
+      // because it may underflow.
+      Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
+      if (!Cmp || Comparison::isUnsigned(Cmp))
+        return false;
+
+      // If the register is being compared against an immediate, try changing
+      // the compare instruction to use induction register and adjust the
+      // immediate operand.
+      int64_t CmpImm = getImmediate(*CmpImmOp);
+      int64_t V = RB.second;
+      // Handle Overflow (64-bit).
+      if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
+          ((V < 0) && (CmpImm < INT64_MIN - V)))
+        return false;
+      CmpImm += V;
+      // Most comparisons of register against an immediate value allow
+      // the immediate to be constant-extended. There are some exceptions
+      // though. Make sure the new combination will work.
+      if (CmpImmOp->isImm())
+        if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
+          return false;
+
+      // Make sure that the compare happens after the bump.  Otherwise,
+      // after the fixup, the compare would use a yet-undefined register.
+      MachineInstr *BumpI = MRI->getVRegDef(I->first);
+      bool Order = orderBumpCompare(BumpI, PredDef);
+      if (!Order)
+        return false;
+
+      // Finally, fix the compare instruction.
+      setImmediate(*CmpImmOp, CmpImm);
+      for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+        MachineOperand &MO = PredDef->getOperand(i);
+        if (MO.isReg() && MO.getReg() == RB.first) {
+          MO.setReg(I->first);
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+      MachineLoop *L) {
+  if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+    return TmpPH;
+
+  if (!HWCreatePreheader)
+    return nullptr;
+
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineFunction *MF = Header->getParent();
+  DebugLoc DL;
+
+#ifndef NDEBUG
+  if ((PHFn != "") && (PHFn != MF->getName()))
+    return nullptr;
+#endif
+
+  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
+    return nullptr;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+
+  // Verify that all existing predecessors have analyzable branches
+  // (or no branches at all).
+  typedef std::vector<MachineBasicBlock*> MBBVector;
+  MBBVector Preds(Header->pred_begin(), Header->pred_end());
+  SmallVector<MachineOperand,2> Tmp1;
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
+
+  if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
+    return nullptr;
+
+  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+    MachineBasicBlock *PB = *I;
+    bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+    if (NotAnalyzed)
+      return nullptr;
+  }
+
+  MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+  MF->insert(Header->getIterator(), NewPH);
+
+  if (Header->pred_size() > 2) {
+    // Ensure that the header has only two predecessors: the preheader and
+    // the loop latch.  Any additional predecessors of the header should
+    // join at the newly created preheader. Inspect all PHI nodes from the
+    // header and create appropriate corresponding PHI nodes in the preheader.
+
+    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+         I != E && I->isPHI(); ++I) {
+      MachineInstr *PN = &*I;
+
+      const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+      MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+      NewPH->insert(NewPH->end(), NewPN);
+
+      unsigned PR = PN->getOperand(0).getReg();
+      const TargetRegisterClass *RC = MRI->getRegClass(PR);
+      unsigned NewPR = MRI->createVirtualRegister(RC);
+      NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+      // Copy all non-latch operands of a header's PHI node to the newly
+      // created PHI node in the preheader.
+      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+        unsigned PredR = PN->getOperand(i).getReg();
+        unsigned PredRSub = PN->getOperand(i).getSubReg();
+        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+        if (PredB == Latch)
+          continue;
+
+        MachineOperand MO = MachineOperand::CreateReg(PredR, false);
+        MO.setSubReg(PredRSub);
+        NewPN->addOperand(MO);
+        NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+      }
+
+      // Remove copied operands from the old PHI node and add the value
+      // coming from the preheader's PHI.
+      for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+        if (PredB != Latch) {
+          PN->RemoveOperand(i+1);
+          PN->RemoveOperand(i);
+        }
+      }
+      PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+      PN->addOperand(MachineOperand::CreateMBB(NewPH));
+    }
+
+  } else {
+    assert(Header->pred_size() == 2);
+
+    // The header has only two predecessors, but the non-latch predecessor
+    // is not a preheader (e.g. it has other successors, etc.)
+    // In such a case we don't need any extra PHI nodes in the new preheader,
+    // all we need is to adjust existing PHIs in the header to now refer to
+    // the new preheader.
+    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+         I != E && I->isPHI(); ++I) {
+      MachineInstr *PN = &*I;
+      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+        MachineOperand &MO = PN->getOperand(i+1);
+        if (MO.getMBB() != Latch)
+          MO.setMBB(NewPH);
+      }
+    }
+  }
+
+  // "Reroute" the CFG edges to link in the new preheader.
+  // If any of the predecessors falls through to the header, insert a branch
+  // to the new preheader in that place.
+  SmallVector<MachineOperand,1> Tmp2;
+  SmallVector<MachineOperand,1> EmptyCond;
+
+  TB = FB = nullptr;
+
+  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+    MachineBasicBlock *PB = *I;
+    if (PB != Latch) {
+      Tmp2.clear();
+      bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+      (void)NotAnalyzed; // suppress compiler warning
+      assert (!NotAnalyzed && "Should be analyzable!");
+      if (TB != Header && (Tmp2.empty() || FB != Header))
+        TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
+      PB->ReplaceUsesOfBlockWith(Header, NewPH);
+    }
+  }
+
+  // It can happen that the latch block will fall through into the header.
+  // Insert an unconditional branch to the header.
+  TB = FB = nullptr;
+  bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+  (void)LatchNotAnalyzed; // suppress compiler warning
+  assert (!LatchNotAnalyzed && "Should be analyzable!");
+  if (!TB && !FB)
+    TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
+
+  // Finally, the branch from the preheader to the header.
+  TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
+  NewPH->addSuccessor(Header);
+
+  MachineLoop *ParentLoop = L->getParentLoop();
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
+
+  // Update the dominator information with the new preheader.
+  if (MDT) {
+    MachineDomTreeNode *HDom = MDT->getNode(Header);
+    MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
+    MDT->changeImmediateDominator(Header, NewPH);
+  }
+
+  return NewPH;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 0000000..a0da945
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1563 @@
+//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-isel"
+
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+  cl::Hidden, cl::init(2),
+  cl::desc("Maximum number of uses of a global address such that we still us a"
+           "constant extended instruction"));
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+  const HexagonTargetMachine& HTM;
+  const HexagonSubtarget *HST;
+  const HexagonInstrInfo *HII;
+  const HexagonRegisterInfo *HRI;
+public:
+  explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
+                               CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+        HRI(nullptr) {
+    initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    // Reset the subtarget each time through.
+    HST = &MF.getSubtarget<HexagonSubtarget>();
+    HII = HST->getInstrInfo();
+    HRI = HST->getRegisterInfo();
+    SelectionDAGISel::runOnMachineFunction(MF);
+    return true;
+  }
+
+  virtual void PreprocessISelDAG() override;
+  virtual void EmitFunctionEntryCode() override;
+
+  SDNode *Select(SDNode *N) override;
+
+  // Complex Pattern Selectors.
+  inline bool SelectAddrGA(SDValue &N, SDValue &R);
+  inline bool SelectAddrGP(SDValue &N, SDValue &R);
+  bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
+  bool SelectAddrFI(SDValue &N, SDValue &R);
+
+  const char *getPassName() const override {
+    return "Hexagon DAG->DAG Pattern Instruction Selection";
+  }
+
+  SDNode *SelectFrameIndex(SDNode *N);
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                    unsigned ConstraintID,
+                                    std::vector<SDValue> &OutOps) override;
+  SDNode *SelectLoad(SDNode *N);
+  SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
+  SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
+  SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        SDLoc dl);
+  SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        SDLoc dl);
+  SDNode *SelectBaseOffsetStore(StoreSDNode *ST, SDLoc dl);
+  SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
+  SDNode *SelectStore(SDNode *N);
+  SDNode *SelectSHL(SDNode *N);
+  SDNode *SelectMul(SDNode *N);
+  SDNode *SelectZeroExtend(SDNode *N);
+  SDNode *SelectIntrinsicWChain(SDNode *N);
+  SDNode *SelectIntrinsicWOChain(SDNode *N);
+  SDNode *SelectConstant(SDNode *N);
+  SDNode *SelectConstantFP(SDNode *N);
+  SDNode *SelectAdd(SDNode *N);
+  SDNode *SelectBitOp(SDNode *N);
+
+  // XformMskToBitPosU5Imm - Returns the bit position which
+  // the single bit 32 bit mask represents.
+  // Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) {
+    int32_t bitPos;
+    bitPos = Log2_32(Imm);
+    assert(bitPos >= 0 && bitPos < 32 &&
+           "Constant out of range for 32 BitPos Memops");
+    return CurDAG->getTargetConstant(bitPos, DL, MVT::i32);
+  }
+
+  // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+  // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) {
+    return XformMskToBitPosU5Imm(Imm, DL);
+  }
+
+  // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+  // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+  SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) {
+    return XformMskToBitPosU5Imm(Imm, DL);
+  }
+
+  // Return true if there is exactly one bit set in V, i.e., if V is one of the
+  // following integers: 2^0, 2^1, ..., 2^31.
+  bool ImmIsSingleBit(uint32_t v) const {
+    return isPowerOf2_32(v);
+  }
+
+  // XformM5ToU5Imm - Return a target constant with the specified value, of
+  // type i32 where the negative literal is transformed into a positive literal
+  // for use in -= memops.
+  inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
+     assert((Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
+     return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
+  }
+
+  // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+  // [1..128], used in cmpb.gtu instructions.
+  inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) {
+    assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8);
+  }
+
+  // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+  inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+  }
+
+  // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+  inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) {
+    assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
+  }
+
+  // XformSToSM2Imm - Return a target constant decremented by 2.
+  inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32);
+  }
+
+  // XformSToSM3Imm - Return a target constant decremented by 3.
+  inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) {
+    return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32);
+  }
+
+  // Include the pieces autogenerated from the target description.
+  #include "HexagonGenDAGISel.inc"
+
+private:
+  bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
+}  // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel) {
+  return new HexagonDAGToDAGISel(TM, OptLevel);
+}
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+  PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+                              &SelectionDAGISel::ID, nullptr, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+// Intrinsics that return a a predicate.
+static bool doesIntrinsicReturnPredicate(unsigned ID) {
+  switch (ID) {
+    default:
+      return false;
+    case Intrinsic::hexagon_C2_cmpeq:
+    case Intrinsic::hexagon_C2_cmpgt:
+    case Intrinsic::hexagon_C2_cmpgtu:
+    case Intrinsic::hexagon_C2_cmpgtup:
+    case Intrinsic::hexagon_C2_cmpgtp:
+    case Intrinsic::hexagon_C2_cmpeqp:
+    case Intrinsic::hexagon_C2_bitsset:
+    case Intrinsic::hexagon_C2_bitsclr:
+    case Intrinsic::hexagon_C2_cmpeqi:
+    case Intrinsic::hexagon_C2_cmpgti:
+    case Intrinsic::hexagon_C2_cmpgtui:
+    case Intrinsic::hexagon_C2_cmpgei:
+    case Intrinsic::hexagon_C2_cmpgeui:
+    case Intrinsic::hexagon_C2_cmplt:
+    case Intrinsic::hexagon_C2_cmpltu:
+    case Intrinsic::hexagon_C2_bitsclri:
+    case Intrinsic::hexagon_C2_and:
+    case Intrinsic::hexagon_C2_or:
+    case Intrinsic::hexagon_C2_xor:
+    case Intrinsic::hexagon_C2_andn:
+    case Intrinsic::hexagon_C2_not:
+    case Intrinsic::hexagon_C2_orn:
+    case Intrinsic::hexagon_C2_pxfer_map:
+    case Intrinsic::hexagon_C2_any8:
+    case Intrinsic::hexagon_C2_all8:
+    case Intrinsic::hexagon_A2_vcmpbeq:
+    case Intrinsic::hexagon_A2_vcmpbgtu:
+    case Intrinsic::hexagon_A2_vcmpheq:
+    case Intrinsic::hexagon_A2_vcmphgt:
+    case Intrinsic::hexagon_A2_vcmphgtu:
+    case Intrinsic::hexagon_A2_vcmpweq:
+    case Intrinsic::hexagon_A2_vcmpwgt:
+    case Intrinsic::hexagon_A2_vcmpwgtu:
+    case Intrinsic::hexagon_C2_tfrrp:
+    case Intrinsic::hexagon_S2_tstbit_i:
+    case Intrinsic::hexagon_S2_tstbit_r:
+      return true;
+  }
+}
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           SDLoc dl) {
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+                                              MVT::Other, Base, TargetConst,
+                                              Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+                                              SDValue(Result_1, 0));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2) };
+    const SDValue Tos[]   = { SDValue(Result_2, 0),
+                              SDValue(Result_1, 1),
+                              SDValue(Result_1, 2) };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_2;
+  }
+
+  SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+  SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+                                            Base, TargetConst0, Chain);
+  SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+                                            SDValue(Result_1, 0));
+  SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                            Base, TargetConstVal,
+                                            SDValue(Result_1, 1));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = LD->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+  const SDValue Froms[] = { SDValue(LD, 0),
+                            SDValue(LD, 1),
+                            SDValue(LD, 2) };
+  const SDValue Tos[]   = { SDValue(Result_2, 0),
+                            SDValue(Result_3, 0),
+                            SDValue(Result_1, 1) };
+  ReplaceUses(Froms, Tos, 3);
+  return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           SDLoc dl) {
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                              MVT::i32, MVT::Other, Base,
+                                              TargetConstVal, Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+                                              MVT::i64, MVT::Other,
+                                              TargetConst0,
+                                              SDValue(Result_1,0));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2) };
+    const SDValue Tos[]   = { SDValue(Result_2, 0),
+                              SDValue(Result_1, 1),
+                              SDValue(Result_1, 2) };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_2;
+  }
+
+  // Generate an indirect load.
+  SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+  SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                            MVT::Other, Base, TargetConst0,
+                                            Chain);
+  SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+                                            MVT::i64, MVT::Other,
+                                            TargetConst0,
+                                            SDValue(Result_1,0));
+  // Add offset to base.
+  SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                            Base, TargetConstVal,
+                                            SDValue(Result_1, 1));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = LD->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+  const SDValue Froms[] = { SDValue(LD, 0),
+                            SDValue(LD, 1),
+                            SDValue(LD, 2) };
+  const SDValue Tos[]   = { SDValue(Result_2, 0), // Load value.
+                            SDValue(Result_3, 0), // New address.
+                            SDValue(Result_1, 1) };
+  ReplaceUses(Froms, Tos, 3);
+  return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
+  SDValue Chain = LD->getChain();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT LoadedVT = LD->getMemoryVT();
+  unsigned Opcode = 0;
+
+  // Check for zero extended loads. Treat any-extend loads as zero extended
+  // loads.
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
+  bool HasVecOffset = false;
+
+  // Figure out the opcode.
+  if (LoadedVT == MVT::i64) {
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::L2_loadrd_pi;
+    else
+      Opcode = Hexagon::L2_loadrd_io;
+  } else if (LoadedVT == MVT::i32) {
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::L2_loadri_pi;
+    else
+      Opcode = Hexagon::L2_loadri_io;
+  } else if (LoadedVT == MVT::i16) {
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+    else
+      Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+  } else if (LoadedVT == MVT::i8) {
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+    else
+      Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+  } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 ||
+             LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) {
+    HasVecOffset = true;
+    if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+      Opcode = Hexagon::V6_vL32b_pi;
+    }
+    else
+      Opcode = Hexagon::V6_vL32b_ai;
+  // 128B
+  } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 ||
+             LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) {
+    HasVecOffset = true;
+    if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+      Opcode = Hexagon::V6_vL32b_pi_128B;
+    }
+    else
+      Opcode = Hexagon::V6_vL32b_ai_128B;
+  } else
+    llvm_unreachable("unknown memory type");
+
+  // For zero extended i64 loads, we need to add combine instructions.
+  if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
+    return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+  // Handle sign extended i64 loads.
+  if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
+    return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                            LD->getValueType(0),
+                                            MVT::i32, MVT::Other, Base,
+                                            TargetConstVal, Chain);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+    if (HasVecOffset) {
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result, 0),
+                                SDValue(Result, 2)
+      };
+      ReplaceUses(Froms, Tos, 2);
+    } else {
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result, 0),
+                                SDValue(Result, 1),
+                                SDValue(Result, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+    }
+    return Result;
+  } else {
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+    SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+                                              LD->getValueType(0),
+                                              MVT::Other, Base, TargetConst0,
+                                              Chain);
+    SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                              Base, TargetConstVal,
+                                              SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_1, 0),
+                              SDValue(Result_2, 0),
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_1;
+  }
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+  SDNode *result;
+  SDLoc dl(N);
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+  // Handle indexed loads.
+  if (AM != ISD::UNINDEXED) {
+    result = SelectIndexedLoad(LD, dl);
+  } else {
+    result = SelectCode(LD);
+  }
+
+  return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
+  SDValue Chain = ST->getChain();
+  SDValue Base = ST->getBasePtr();
+  SDValue Offset = ST->getOffset();
+  SDValue Value = ST->getValue();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT StoredVT = ST->getMemoryVT();
+  EVT ValueVT = Value.getValueType();
+
+  // Offset value must be within representable range
+  // and must have correct alignment properties.
+  if (HII->isValidAutoIncImm(StoredVT, Val)) {
+    unsigned Opcode = 0;
+
+    // Figure out the post inc version of opcode.
+    if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi;
+    else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
+    else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
+    else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
+    else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+             StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) {
+      Opcode = Hexagon::V6_vS32b_pi;
+    }
+    // 128B
+    else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+             StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) {
+      Opcode = Hexagon::V6_vS32b_pi_128B;
+    } else llvm_unreachable("unknown memory type");
+
+    if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+      assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+      Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+                                             dl, MVT::i32, Value);
+    }
+    SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value,
+                     Chain};
+    // Build post increment store.
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                            MVT::Other, Ops);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = ST->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+    ReplaceUses(ST, Result);
+    ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+    return Result;
+  }
+
+  // Note: Order of operands matches the def of instruction:
+  // def S2_storerd_io
+  //   : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
+  // and it differs for POST_ST* for instance.
+  SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value,
+                    Chain};
+  unsigned Opcode = 0;
+
+  // Figure out the opcode.
+  if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
+  else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
+  else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
+  else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
+  else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+           StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8)
+     Opcode = Hexagon::V6_vS32b_ai;
+  // 128B
+  else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+           StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8)
+     Opcode = Hexagon::V6_vS32b_ai_128B;
+  else llvm_unreachable("unknown memory type");
+
+  // Build regular store.
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+  // Build splitted incriment instruction.
+  SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+                                            Base,
+                                            TargetConstVal,
+                                            SDValue(Result_1, 0));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = ST->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+  ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+  ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+  return Result_2;
+}
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+  SDLoc dl(N);
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+  // Handle indexed stores.
+  if (AM != ISD::UNINDEXED) {
+    return SelectIndexedStore(ST, dl);
+  }
+
+  return SelectCode(ST);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+  SDLoc dl(N);
+
+  //
+  // %conv.i = sext i32 %tmp1 to i64
+  // %conv2.i = sext i32 %add to i64
+  // %mul.i = mul nsw i64 %conv2.i, %conv.i
+  //
+  //   --- match with the following ---
+  //
+  // %mul.i = mpy (%tmp1, %add)
+  //
+
+  if (N->getValueType(0) == MVT::i64) {
+    // Shifting a i64 signed multiply.
+    SDValue MulOp0 = N->getOperand(0);
+    SDValue MulOp1 = N->getOperand(1);
+
+    SDValue OP0;
+    SDValue OP1;
+
+    // Handle sign_extend and sextload.
+    if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext0 = MulOp0.getOperand(0);
+      if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+        return SelectCode(N);
+      }
+
+      OP0 = Sext0;
+    } else if (MulOp0.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        return SelectCode(N);
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+      OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Same goes for the second operand.
+    if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext1 = MulOp1.getOperand(0);
+      if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+        return SelectCode(N);
+      }
+
+      OP1 = Sext1;
+    } else if (MulOp1.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        return SelectCode(N);
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+      OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Generate a mpy instruction.
+    SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64,
+                                            OP0, OP1);
+    ReplaceUses(N, Result);
+    return Result;
+  }
+
+  return SelectCode(N);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+  SDLoc dl(N);
+  if (N->getValueType(0) == MVT::i32) {
+    SDValue Shl_0 = N->getOperand(0);
+    SDValue Shl_1 = N->getOperand(1);
+    // RHS is const.
+    if (Shl_1.getOpcode() == ISD::Constant) {
+      if (Shl_0.getOpcode() == ISD::MUL) {
+        SDValue Mul_0 = Shl_0.getOperand(0); // Val
+        SDValue Mul_1 = Shl_0.getOperand(1); // Const
+        // RHS of mul is const.
+        if (Mul_1.getOpcode() == ISD::Constant) {
+          int32_t ShlConst =
+            cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+          int32_t MulConst =
+            cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+          int32_t ValConst = MulConst << ShlConst;
+          SDValue Val = CurDAG->getTargetConstant(ValConst, dl,
+                                                  MVT::i32);
+          if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+            if (isInt<9>(CN->getSExtValue())) {
+              SDNode* Result =
+                CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
+                                       MVT::i32, Mul_0, Val);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+
+        }
+      } else if (Shl_0.getOpcode() == ISD::SUB) {
+        SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+        SDValue Sub_1 = Shl_0.getOperand(1); // Val
+        if (Sub_0.getOpcode() == ISD::Constant) {
+          int32_t SubConst =
+            cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+          if (SubConst == 0) {
+            if (Sub_1.getOpcode() == ISD::SHL) {
+              SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+              SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+              if (Shl2_1.getOpcode() == ISD::Constant) {
+                int32_t ShlConst =
+                  cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+                int32_t Shl2Const =
+                  cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+                int32_t ValConst = 1 << (ShlConst+Shl2Const);
+                SDValue Val = CurDAG->getTargetConstant(-ValConst, dl,
+                                                        MVT::i32);
+                if (ConstantSDNode *CN =
+                    dyn_cast<ConstantSDNode>(Val.getNode()))
+                  if (isInt<9>(CN->getSExtValue())) {
+                    SDNode* Result =
+                      CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32,
+                                             Shl2_0, Val);
+                    ReplaceUses(N, Result);
+                    return Result;
+                  }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+  SDLoc dl(N);
+
+  SDValue Op0 = N->getOperand(0);
+  EVT OpVT = Op0.getValueType();
+  unsigned OpBW = OpVT.getSizeInBits();
+
+  // Special handling for zero-extending a vector of booleans.
+  if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+    SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+    unsigned NE = OpVT.getVectorNumElements();
+    EVT ExVT = N->getValueType(0);
+    unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+    uint64_t MV = 0, Bit = 1;
+    for (unsigned i = 0; i < NE; ++i) {
+      MV |= Bit;
+      Bit <<= ES;
+    }
+    SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
+    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+                                             MVT::i64, Ones);
+    if (ExVT.getSizeInBits() == 32) {
+      SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+                                           SDValue(Mask,0), SDValue(OnesReg,0));
+      SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+                                               MVT::i32);
+      return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+                                    SDValue(And,0), SubR);
+    }
+    return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+                                  SDValue(Mask,0), SDValue(OnesReg,0));
+  }
+
+  SDNode *IsIntrinsic = N->getOperand(0).getNode();
+  if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+    unsigned ID =
+      cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+    if (doesIntrinsicReturnPredicate(ID)) {
+      // Now we need to differentiate target data types.
+      if (N->getValueType(0) == MVT::i64) {
+        // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
+        SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
+                                                  MVT::i32,
+                                                  SDValue(IsIntrinsic, 0));
+        SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
+                                                  MVT::i32,
+                                                  TargetConst0);
+        SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+                                                  MVT::i64, MVT::Other,
+                                                  SDValue(Result_2, 0),
+                                                  SDValue(Result_1, 0));
+        ReplaceUses(N, Result_3);
+        return Result_3;
+      }
+      if (N->getValueType(0) == MVT::i32) {
+        // Convert the zero_extend to Rs = Pd
+        SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
+                                              MVT::i32,
+                                              SDValue(IsIntrinsic, 0));
+        ReplaceUses(N, RsPd);
+        return RsPd;
+      }
+      llvm_unreachable("Unexpected value type");
+    }
+  }
+  return SelectCode(N);
+}
+
+//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  if (IntNo == Intrinsic::hexagon_circ_ldd  ||
+      IntNo == Intrinsic::hexagon_circ_ldw  ||
+      IntNo == Intrinsic::hexagon_circ_lduh ||
+      IntNo == Intrinsic::hexagon_circ_ldh  ||
+      IntNo == Intrinsic::hexagon_circ_ldub ||
+      IntNo == Intrinsic::hexagon_circ_ldb) {
+    SDLoc dl(N);
+    SDValue Chain = N->getOperand(0);
+    SDValue Base = N->getOperand(2);
+    SDValue Load = N->getOperand(3);
+    SDValue ModifierExpr = N->getOperand(4);
+    SDValue Offset = N->getOperand(5);
+
+    // We need to add the rerurn type for the load.  This intrinsic has
+    // two return types, one for the load and one for the post-increment.
+    // Only the *_ld instructions push the extra return type, and bump the
+    // result node operand number correspondingly.
+    std::vector<EVT> ResTys;
+    unsigned opc;
+    unsigned memsize, align;
+    MVT MvtSize = MVT::i32;
+
+    if (IntNo == Intrinsic::hexagon_circ_ldd) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i64);
+      opc = Hexagon::L2_loadrd_pci_pseudo;
+      memsize = 8;
+      align = 8;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadri_pci_pseudo;
+      memsize = 4;
+      align = 4;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrh_pci_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadruh_pci_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrb_pci_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrub_pci_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else
+      llvm_unreachable("no opc");
+
+    ResTys.push_back(MVT::Other);
+
+    // Copy over the arguments, which are the same mostly.
+    SmallVector<SDValue, 5> Ops;
+    Ops.push_back(Base);
+    Ops.push_back(Load);
+    Ops.push_back(ModifierExpr);
+    int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+    Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32));
+    Ops.push_back(Chain);
+    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+    SDValue ST;
+    MachineMemOperand *Mem =
+      MF->getMachineMemOperand(MachinePointerInfo(),
+                               MachineMemOperand::MOStore, memsize, align);
+    if (MvtSize != MVT::i32)
+      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+                                 MvtSize, Mem);
+    else
+      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+    SDNode* Store = SelectStore(ST.getNode());
+
+    const SDValue Froms[] = { SDValue(N, 0),
+                              SDValue(N, 1) };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Store, 0) };
+    ReplaceUses(Froms, Tos, 2);
+    return Result;
+  }
+
+  if (IntNo == Intrinsic::hexagon_brev_ldd  ||
+      IntNo == Intrinsic::hexagon_brev_ldw  ||
+      IntNo == Intrinsic::hexagon_brev_ldh  ||
+      IntNo == Intrinsic::hexagon_brev_lduh ||
+      IntNo == Intrinsic::hexagon_brev_ldb  ||
+      IntNo == Intrinsic::hexagon_brev_ldub) {
+    SDLoc dl(N);
+    SDValue Chain = N->getOperand(0);
+    SDValue Base = N->getOperand(2);
+    SDValue Load = N->getOperand(3);
+    SDValue ModifierExpr = N->getOperand(4);
+
+    // We need to add the rerurn type for the load.  This intrinsic has
+    // two return types, one for the load and one for the post-increment.
+    std::vector<EVT> ResTys;
+    unsigned opc;
+    unsigned memsize, align;
+    MVT MvtSize = MVT::i32;
+
+    if (IntNo == Intrinsic::hexagon_brev_ldd) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i64);
+      opc = Hexagon::L2_loadrd_pbr_pseudo;
+      memsize = 8;
+      align = 8;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadri_pbr_pseudo;
+      memsize = 4;
+      align = 4;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrh_pbr_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadruh_pbr_pseudo;
+      memsize = 2;
+      align = 2;
+      MvtSize = MVT::i16;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrb_pbr_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+      ResTys.push_back(MVT::i32);
+      ResTys.push_back(MVT::i32);
+      opc = Hexagon::L2_loadrub_pbr_pseudo;
+      memsize = 1;
+      align = 1;
+      MvtSize = MVT::i8;
+    } else
+      llvm_unreachable("no opc");
+
+    ResTys.push_back(MVT::Other);
+
+    // Copy over the arguments, which are the same mostly.
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(Base);
+    Ops.push_back(Load);
+    Ops.push_back(ModifierExpr);
+    Ops.push_back(Chain);
+    SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+    SDValue ST;
+    MachineMemOperand *Mem =
+      MF->getMachineMemOperand(MachinePointerInfo(),
+                               MachineMemOperand::MOStore, memsize, align);
+    if (MvtSize != MVT::i32)
+      ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+                                 MvtSize, Mem);
+    else
+      ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+    SDNode* Store = SelectStore(ST.getNode());
+
+    const SDValue Froms[] = { SDValue(N, 0),
+                              SDValue(N, 1) };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Store, 0) };
+    ReplaceUses(Froms, Tos, 2);
+    return Result;
+  }
+
+  return SelectCode(N);
+}
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  unsigned Bits;
+  switch (IID) {
+  case Intrinsic::hexagon_S2_vsplatrb:
+    Bits = 8;
+    break;
+  case Intrinsic::hexagon_S2_vsplatrh:
+    Bits = 16;
+    break;
+  default:
+    return SelectCode(N);
+  }
+
+  SDValue const &V = N->getOperand(1);
+  SDValue U;
+  if (isValueExtension(V, Bits, U)) {
+    SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+      N->getOperand(0), U);
+    return SelectCode(R.getNode());
+  }
+  return SelectCode(N);
+}
+
+//
+// Map floating point constant values.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
+  SDLoc dl(N);
+  ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+  APFloat APF = CN->getValueAPF();
+  if (N->getValueType(0) == MVT::f32) {
+    return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
+              CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32));
+  }
+  else if (N->getValueType(0) == MVT::f64) {
+    return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
+              CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64));
+  }
+
+  return SelectCode(N);
+}
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+  SDLoc dl(N);
+  if (N->getValueType(0) == MVT::i1) {
+    SDNode* Result = 0;
+    int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    if (Val == -1) {
+      Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+    } else if (Val == 0) {
+      Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+    }
+    if (Result) {
+      ReplaceUses(N, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+  SDLoc dl(N);
+  if (N->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+  // Identify nodes of the form: add(asr(...)).
+  SDNode* Src1 = N->getOperand(0).getNode();
+  if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+      || Src1->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+
+  // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+  // Rd and Rd' are assigned to the same register
+  SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32,
+                                          N->getOperand(1),
+                                          Src1->getOperand(0),
+                                          Src1->getOperand(1));
+  ReplaceUses(N, Result);
+
+  return Result;
+}
+
+//
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+  SDLoc dl(N);
+  EVT ValueVT = N->getValueType(0);
+
+  // We handle only 32 and 64-bit bit ops.
+  if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+        ValueVT == MVT::f32 || ValueVT == MVT::f64))
+    return SelectCode(N);
+
+  // We handly only fabs and fneg for V5.
+  unsigned Opc = N->getOpcode();
+  if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+    return SelectCode(N);
+
+  int64_t Val = 0;
+  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+    if (N->getOperand(1).getOpcode() == ISD::Constant)
+      Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+    else
+     return SelectCode(N);
+  }
+
+  if (Opc == ISD::AND) {
+    // Check if this is a bit-clearing AND, if not select code the usual way.
+    if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
+        (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
+      Val = ~Val;
+    else
+      return SelectCode(N);
+  }
+
+  // If OR or AND is being fed by shl, srl and, sra don't do this change,
+  // because Hexagon provide |= &= on shl, srl, and sra.
+  // Traverse the DAG to see if there is shl, srl and sra.
+  if (Opc == ISD::OR || Opc == ISD::AND) {
+    switch (N->getOperand(0)->getOpcode()) {
+      default:
+        break;
+      case ISD::SRA:
+      case ISD::SRL:
+      case ISD::SHL:
+        return SelectCode(N);
+    }
+  }
+
+  // Make sure it's power of 2.
+  unsigned BitPos = 0;
+  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+    if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
+        (ValueVT == MVT::i64 && !isPowerOf2_64(Val)))
+      return SelectCode(N);
+
+    // Get the bit position.
+    BitPos = countTrailingZeros(uint64_t(Val));
+  } else {
+    // For fabs and fneg, it's always the 31st bit.
+    BitPos = 31;
+  }
+
+  unsigned BitOpc = 0;
+  // Set the right opcode for bitwise operations.
+  switch (Opc) {
+    default:
+      llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+    case ISD::AND:
+    case ISD::FABS:
+      BitOpc = Hexagon::S2_clrbit_i;
+      break;
+    case ISD::OR:
+      BitOpc = Hexagon::S2_setbit_i;
+      break;
+    case ISD::XOR:
+    case ISD::FNEG:
+      BitOpc = Hexagon::S2_togglebit_i;
+      break;
+  }
+
+  SDNode *Result;
+  // Get the right SDVal for the opcode.
+  SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
+
+  if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+    Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+                                    N->getOperand(0), SDVal);
+  } else {
+    // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+    EVT SubValueVT;
+    if (ValueVT == MVT::i64)
+      SubValueVT = MVT::i32;
+    else
+      SubValueVT = MVT::f32;
+
+    SDNode *Reg = N->getOperand(0).getNode();
+    SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+                                                 dl, MVT::i64);
+
+    SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl,
+                                                    MVT::i32);
+    SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
+                                                    MVT::i32);
+
+    SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+                                                    MVT::i32, SDValue(Reg, 0));
+
+    SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+                                                    MVT::i32, SDValue(Reg, 0));
+
+    // Clear/set/toggle hi or lo registers depending on the bit position.
+    if (SubValueVT != MVT::f32 && BitPos < 32) {
+      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+                                               SubregLO, SDVal);
+      const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+                              SDValue(Result0, 0), SubregLoIdx };
+      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                      dl, ValueVT, Ops);
+    } else {
+      if (Opc != ISD::FABS && Opc != ISD::FNEG)
+        SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
+      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+                                               SubregHI, SDVal);
+      const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+                              SubregLO, SubregLoIdx };
+      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                      dl, ValueVT, Ops);
+    }
+  }
+
+  ReplaceUses(N, Result);
+  return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  const HexagonFrameLowering *HFI = HST->getFrameLowering();
+  int FX = cast<FrameIndexSDNode>(N)->getIndex();
+  unsigned StkA = HFI->getStackAlignment();
+  unsigned MaxA = MFI->getMaxAlignment();
+  SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+  SDLoc DL(N);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+  SDNode *R = 0;
+
+  // Use TFR_FI when:
+  // - the object is fixed, or
+  // - there are no objects with higher-than-default alignment, or
+  // - there are no dynamically allocated objects.
+  // Otherwise, use TFR_FIA.
+  if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) {
+    R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+  } else {
+    auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
+    unsigned AR = HMFI.getStackAlignBaseVReg();
+    SDValue CH = CurDAG->getEntryNode();
+    SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
+    R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops);
+  }
+
+  if (N->getHasDebugValue())
+    CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+  return R;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode()) {
+    N->setNodeId(-1);
+    return nullptr;   // Already selected.
+  }
+
+  switch (N->getOpcode()) {
+  case ISD::Constant:
+    return SelectConstant(N);
+
+  case ISD::ConstantFP:
+    return SelectConstantFP(N);
+
+  case ISD::FrameIndex:
+    return SelectFrameIndex(N);
+
+  case ISD::ADD:
+    return SelectAdd(N);
+
+  case ISD::SHL:
+    return SelectSHL(N);
+
+  case ISD::LOAD:
+    return SelectLoad(N);
+
+  case ISD::STORE:
+    return SelectStore(N);
+
+  case ISD::MUL:
+    return SelectMul(N);
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::FABS:
+  case ISD::FNEG:
+    return SelectBitOp(N);
+
+  case ISD::ZERO_EXTEND:
+    return SelectZeroExtend(N);
+
+  case ISD::INTRINSIC_W_CHAIN:
+    return SelectIntrinsicWChain(N);
+
+  case ISD::INTRINSIC_WO_CHAIN:
+    return SelectIntrinsicWOChain(N);
+  }
+
+  return SelectCode(N);
+}
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Inp = Op, Res;
+
+  switch (ConstraintID) {
+  default:
+    return true;
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_o: // Offsetable.
+  case InlineAsm::Constraint_v: // Not offsetable.
+  case InlineAsm::Constraint_m: // Memory.
+    if (SelectAddrFI(Inp, Res))
+      OutOps.push_back(Res);
+    else
+      OutOps.push_back(Inp);
+    break;
+  }
+
+  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
+  return false;
+}
+
+
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+  SelectionDAG &DAG = *CurDAG;
+  std::vector<SDNode*> Nodes;
+  for (SDNode &Node : DAG.allnodes())
+    Nodes.push_back(&Node);
+
+  // Simplify: (or (select c x 0) z)  ->  (select c (or x z) z)
+  //           (or (select c 0 y) z)  ->  (select c z (or y z))
+  // This may not be the right thing for all targets, so do it here.
+  for (auto I: Nodes) {
+    if (I->getOpcode() != ISD::OR)
+      continue;
+
+    auto IsZero = [] (const SDValue &V) -> bool {
+      if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+        return SC->isNullValue();
+      return false;
+    };
+    auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+      if (Op.getOpcode() != ISD::SELECT)
+        return false;
+      return IsZero(Op.getOperand(1))  || IsZero(Op.getOperand(2));
+    };
+
+    SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+    EVT VT = I->getValueType(0);
+    bool SelN0 = IsSelect0(N0);
+    SDValue SOp = SelN0 ? N0 : N1;
+    SDValue VOp = SelN0 ? N1 : N0;
+
+    if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+      SDValue SC = SOp.getOperand(0);
+      SDValue SX = SOp.getOperand(1);
+      SDValue SY = SOp.getOperand(2);
+      SDLoc DLS = SOp;
+      if (IsZero(SY)) {
+        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+      } else if (IsZero(SX)) {
+        SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+        SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+        DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+      }
+    }
+  }
+}
+
+void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
+  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+  auto &HFI = *HST.getFrameLowering();
+  if (!HFI.needsAligna(*MF))
+    return;
+
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineBasicBlock *EntryBB = &MF->front();
+  unsigned AR = FuncInfo->CreateReg(MVT::i32);
+  unsigned MaxA = MFI->getMaxAlignment();
+  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
+      .addImm(MaxA);
+  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
+}
+
+// Match a frame index that can be used in an addressing mode.
+bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
+  if (N.getOpcode() != ISD::FrameIndex)
+    return false;
+  auto &HFI = *HST->getFrameLowering();
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  int FX = cast<FrameIndexSDNode>(N)->getIndex();
+  if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
+    return false;
+  R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+  return true;
+}
+
+inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
+  return SelectGlobalAddress(N, R, false);
+}
+
+inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
+  return SelectGlobalAddress(N, R, true);
+}
+
+bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
+                                              bool UseGP) {
+  switch (N.getOpcode()) {
+  case ISD::ADD: {
+    SDValue N0 = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    unsigned GAOpc = N0.getOpcode();
+    if (UseGP && GAOpc != HexagonISD::CONST32_GP)
+      return false;
+    if (!UseGP && GAOpc != HexagonISD::CONST32)
+      return false;
+    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
+      SDValue Addr = N0.getOperand(0);
+      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
+        if (GA->getOpcode() == ISD::TargetGlobalAddress) {
+          uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
+          R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
+                                             N.getValueType(), NewOff);
+          return true;
+        }
+      }
+    }
+    break;
+  }
+  case HexagonISD::CONST32:
+    // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
+    // want in the instruction.
+    if (!UseGP)
+      R = N.getOperand(0);
+    return !UseGP;
+  case HexagonISD::CONST32_GP:
+    if (UseGP)
+      R = N.getOperand(0);
+    return UseGP;
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+      unsigned FromBits, SDValue &Src) {
+  unsigned Opc = Val.getOpcode();
+  switch (Opc) {
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: {
+    SDValue const &Op0 = Val.getOperand(0);
+    EVT T = Op0.getValueType();
+    if (T.isInteger() && T.getSizeInBits() == FromBits) {
+      Src = Op0;
+      return true;
+    }
+    break;
+  }
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    if (Val.getOperand(0).getValueType().isInteger()) {
+      VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
+      if (T->getVT().getSizeInBits() == FromBits) {
+        Src = Val.getOperand(0);
+        return true;
+      }
+    }
+    break;
+  case ISD::AND: {
+    // Check if this is an AND with "FromBits" of lower bits set to 1.
+    uint64_t FromMask = (1 << FromBits) - 1;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+      if (C->getZExtValue() == FromMask) {
+        Src = Val.getOperand(1);
+        return true;
+      }
+    }
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+      if (C->getZExtValue() == FromMask) {
+        Src = Val.getOperand(0);
+        return true;
+      }
+    }
+    break;
+  }
+  case ISD::OR:
+  case ISD::XOR: {
+    // OR/XOR with the lower "FromBits" bits set to 0.
+    uint64_t FromMask = (1 << FromBits) - 1;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
+      if ((C->getZExtValue() & FromMask) == 0) {
+        Src = Val.getOperand(1);
+        return true;
+      }
+    }
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
+      if ((C->getZExtValue() & FromMask) == 0) {
+        Src = Val.getOperand(0);
+        return true;
+      }
+    }
+  }
+  default:
+    break;
+  }
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 0000000..0167090
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,2894 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-lowering"
+
+static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
+  cl::init(true), cl::Hidden,
+  cl::desc("Control jump table emission on Hexagon target"));
+
+static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon SDNode scheduling"));
+
+static cl::opt<bool> EnableFastMath("ffast-math",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Fast Math processing"));
+
+static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
+  cl::Hidden, cl::ZeroOrMore, cl::init(5),
+  cl::desc("Set minimum jump tables"));
+
+static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
+  cl::Hidden, cl::ZeroOrMore, cl::init(6),
+  cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memcpy"));
+
+static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
+  cl::Hidden, cl::ZeroOrMore, cl::init(6),
+  cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memmove"));
+
+static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
+  cl::Hidden, cl::ZeroOrMore, cl::init(8),
+  cl::desc("Max #stores to inline memset"));
+
+static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
+  cl::Hidden, cl::ZeroOrMore, cl::init(4),
+  cl::desc("Max #stores to inline memset"));
+
+
+namespace {
+class HexagonCCState : public CCState {
+  unsigned NumNamedVarArgParams;
+
+public:
+  HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+                 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+                 int NumNamedVarArgParams)
+      : CCState(CC, isVarArg, MF, locs, C),
+        NumNamedVarArgParams(NumNamedVarArgParams) {}
+
+  unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+};
+}
+
+// Implement calling convention for Hexagon.
+
+static bool IsHvxVectorType(MVT ty);
+
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+           MVT LocVT, CCValAssign::LocInfo LocInfo,
+           ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_HexagonVector(unsigned ValNo, MVT ValVT,
+                 MVT LocVT, CCValAssign::LocInfo LocInfo,
+                 ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+              MVT LocVT, CCValAssign::LocInfo LocInfo,
+              ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+                    MVT LocVT, CCValAssign::LocInfo LocInfo,
+                    ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+            MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  HexagonCCState &HState = static_cast<HexagonCCState &>(State);
+
+  if (ValNo < HState.getNumNamedVarArgParams()) {
+    // Deal with named arguments.
+    return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+  }
+
+  // Deal with un-named arguments.
+  unsigned ofst;
+  if (ArgFlags.isByVal()) {
+    // If pass-by-value, the size allocated on stack is decided
+    // by ArgFlags.getByValSize(), not by the size of LocVT.
+    ofst = State.AllocateStack(ArgFlags.getByValSize(),
+                               ArgFlags.getByValAlign());
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+    ofst = State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+    ofst = State.AllocateStack(8, 8);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
+      LocVT == MVT::v16i8) {
+    ofst = State.AllocateStack(16, 16);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
+      LocVT == MVT::v32i8) {
+    ofst = State.AllocateStack(32, 32);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+      LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
+    ofst = State.AllocateStack(64, 64);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+      LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
+    ofst = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+      LocVT == MVT::v256i8) {
+    ofst = State.AllocateStack(256, 256);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+
+  llvm_unreachable(nullptr);
+}
+
+
+static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
+      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  if (ArgFlags.isByVal()) {
+    // Passed on stack.
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
+                                          ArgFlags.getByValAlign());
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+
+  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+    LocVT = MVT::i32;
+    LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::BCvt;
+  }
+
+  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+    if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+    if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+
+  if (IsHvxVectorType(LocVT)) {
+    if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  return true;  // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const MCPhysReg RegList[] = {
+    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+    Hexagon::R5
+  };
+  if (unsigned Reg = State.AllocateReg(RegList)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  static const MCPhysReg RegList1[] = {
+    Hexagon::D1, Hexagon::D2
+  };
+  static const MCPhysReg RegList2[] = {
+    Hexagon::R1, Hexagon::R3
+  };
+  if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
+                             MVT LocVT, CCValAssign::LocInfo LocInfo,
+                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+    static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
+                                         Hexagon::V2, Hexagon::V3,
+                                         Hexagon::V4, Hexagon::V5,
+                                         Hexagon::V6, Hexagon::V7,
+                                         Hexagon::V8, Hexagon::V9,
+                                         Hexagon::V10, Hexagon::V11,
+                                         Hexagon::V12, Hexagon::V13,
+                                         Hexagon::V14, Hexagon::V15};
+    static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
+                                         Hexagon::W2, Hexagon::W3,
+                                         Hexagon::W4, Hexagon::W5,
+                                         Hexagon::W6, Hexagon::W7};
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  if ((UseHVX && !UseHVXDbl) &&
+      (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+       LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
+    if (unsigned Reg = State.AllocateReg(VecLstS)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(64, 64);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  if ((UseHVX && !UseHVXDbl) &&
+      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+       LocVT == MVT::v128i8)) {
+    if (unsigned Reg = State.AllocateReg(VecLstD)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  // 128B Mode
+  if ((UseHVX && UseHVXDbl) &&
+      (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+       LocVT == MVT::v256i8)) {
+    if (unsigned Reg = State.AllocateReg(VecLstD)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(256, 256);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  if ((UseHVX && UseHVXDbl) &&
+      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+       LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
+    if (unsigned Reg = State.AllocateReg(VecLstS)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  return true;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  if (LocVT == MVT::i1 ||
+      LocVT == MVT::i8 ||
+      LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+    LocVT = MVT::i32;
+    LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
+             LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
+             LocVT == MVT::v512i1) {
+    LocVT = MVT::v16i32;
+    ValVT = MVT::v16i32;
+    LocInfo = CCValAssign::Full;
+  } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
+             LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
+             (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
+    LocVT = MVT::v32i32;
+    ValVT = MVT::v32i32;
+    LocInfo = CCValAssign::Full;
+  } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
+             LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
+    LocVT = MVT::v64i32;
+    ValVT = MVT::v64i32;
+    LocInfo = CCValAssign::Full;
+  }
+  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+    if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+
+  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+    if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+  if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
+    if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+  return true;  // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (LocVT == MVT::i32 || LocVT == MVT::f32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  if (LocVT == MVT::i64 || LocVT == MVT::f64) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+                                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                                ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  unsigned OffSiz = 64;
+  if (LocVT == MVT::v16i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  } else if (LocVT == MVT::v32i32) {
+    unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
+    if (unsigned Reg = State.AllocateReg(Req)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    OffSiz = 128;
+  } else if (LocVT == MVT::v64i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    OffSiz = 256;
+  }
+
+  unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+  if (VT != PromotedLdStVT) {
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+  }
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+  return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.  Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          SDLoc dl) {
+
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       /*isTailCall=*/false,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+static bool IsHvxVectorType(MVT ty) {
+  return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
+          ty == MVT::v64i8 ||
+          ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
+          ty == MVT::v128i8 ||
+          ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
+          ty == MVT::v256i8 ||
+          ty == MVT::v512i1 || ty == MVT::v1024i1);
+}
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   SDLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Analyze return values of ISD::RET
+  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
+}
+
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  // If either no tail call or told not to tail call at all, don't.
+  auto Attr =
+      CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
+  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
+    return false;
+
+  return true;
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers.  This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const
+                                       SmallVectorImpl<ISD::InputArg> &Ins,
+                                       SDLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals,
+                                       const SmallVectorImpl<SDValue> &OutVals,
+                                       SDValue Callee) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG                     = CLI.DAG;
+  SDLoc &dl                             = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
+  SDValue Chain                         = CLI.Chain;
+  SDValue Callee                        = CLI.Callee;
+  bool &isTailCall                      = CLI.IsTailCall;
+  CallingConv::ID CallConv              = CLI.CallConv;
+  bool isVarArg                         = CLI.IsVarArg;
+  bool doesNotReturn                    = CLI.DoesNotReturn;
+
+  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  MachineFunction &MF = DAG.getMachineFunction();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
+
+  // Check for varargs.
+  int NumNamedVarArgParams = -1;
+  if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = GAN->getGlobal();
+    Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+    if (const Function* F = dyn_cast<Function>(GV)) {
+      // If a function has zero args and is a vararg function, that's
+      // disallowed so it must be an undeclared function.  Do not assume
+      // varargs if the callee is undefined.
+      if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
+        NumNamedVarArgParams = F->getFunctionType()->getNumParams();
+    }
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                        *DAG.getContext(), NumNamedVarArgParams);
+
+  if (isVarArg)
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+  else
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+  auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+  if (Attr.getValueAsString() == "true")
+    isTailCall = false;
+
+  if (isTailCall) {
+    bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                                                   isVarArg, IsStructRet,
+                                                   StructAttrFlag,
+                                                   Outs, OutVals, Ins, DAG);
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+      CCValAssign &VA = ArgLocs[i];
+      if (VA.isMemLoc()) {
+        isTailCall = false;
+        break;
+      }
+    }
+    DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
+                                : "Argument must be passed on stack. "
+                                  "Not eligible for Tail Call\n"));
+  }
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  auto &HRI = *Subtarget.getRegisterInfo();
+  SDValue StackPtr =
+      DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
+
+  bool NeedsArgAlign = false;
+  unsigned LargestAlignSeen = 0;
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    // Record if we need > 8 byte alignment on an argument.
+    bool ArgAlign = IsHvxVectorType(VA.getValVT());
+    NeedsArgAlign |= ArgAlign;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default:
+        // Loc info must be one of Full, SExt, ZExt, or AExt.
+        llvm_unreachable("Unknown loc info!");
+      case CCValAssign::BCvt:
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    if (VA.isMemLoc()) {
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
+                                        StackPtr.getValueType());
+      MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
+      if (ArgAlign)
+        LargestAlignSeen = std::max(LargestAlignSeen,
+                                    VA.getLocVT().getStoreSizeInBits() >> 3);
+      if (Flags.isByVal()) {
+        // The argument is a struct passed by value. According to LLVM, "Arg"
+        // is is pointer.
+        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
+                                                        Flags, DAG, dl));
+      } else {
+        MachinePointerInfo LocPI = MachinePointerInfo::getStack(
+            DAG.getMachineFunction(), LocMemOffset);
+        SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
+                                 false, 0);
+        MemOpChains.push_back(S);
+      }
+      continue;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector.
+    if (VA.isRegLoc())
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+  }
+
+  if (NeedsArgAlign && Subtarget.hasV60TOps()) {
+    DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+    MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+    // V6 vectors passed by value have 64 or 128 byte alignment depending
+    // on whether we are 64 byte vector mode or 128 byte.
+    bool UseHVXDbl = Subtarget.useHVXDblOps();
+    assert(Subtarget.useHVXOps());
+    const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
+    LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+    MFI->ensureMaxAlignment(LargestAlignSeen);
+  }
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+  if (!isTailCall) {
+    SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
+    Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+  if (!isTailCall) {
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+  } else {
+    // For tail calls lower the arguments to the 'real' stack slot.
+    //
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    //
+    // Do not flag preceding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag = SDValue();
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
+  } else if (ExternalSymbolSDNode *S =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  if (isTailCall) {
+    MF.getFrameInfo()->setHasTailCall();
+    return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
+  }
+
+  int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
+  Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+                                   bool isSEXTLoad, SDValue &Base,
+                                   SDValue &Offset, bool &isInc,
+                                   SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD)
+    return false;
+
+  auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  bool ValidHVXDblType =
+    (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+                              VT == MVT::v64i16 || VT == MVT::v128i8);
+  bool ValidHVXType =
+    UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+                             VT == MVT::v32i16 || VT == MVT::v64i8);
+
+  if (ValidHVXDblType || ValidHVXType ||
+      VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    // Ensure that Offset is a constant.
+    return (isa<ConstantSDNode>(Offset));
+  }
+
+  return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                       SDValue &Base,
+                                                       SDValue &Offset,
+                                                       ISD::MemIndexedMode &AM,
+                                                       SelectionDAG &DAG) const
+{
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  bool isInc = false;
+  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (isLegal) {
+    auto &HII = *Subtarget.getInstrInfo();
+    int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+    if (HII.isValidAutoIncImm(VT, OffsetVal)) {
+      AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  MachineFunction &MF = DAG.getMachineFunction();
+  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+  switch (Node->getOpcode()) {
+    case ISD::INLINEASM: {
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the flag operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        if (FuncInfo.hasClobberLR())
+          break;
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        ++i;  // Skip the ID value.
+
+        switch (InlineAsm::getKind(Flags)) {
+        default: llvm_unreachable("Bad flags!");
+          case InlineAsm::Kind_RegDef:
+          case InlineAsm::Kind_RegUse:
+          case InlineAsm::Kind_Imm:
+          case InlineAsm::Kind_Clobber:
+          case InlineAsm::Kind_Mem: {
+            for (; NumVals; --NumVals, ++i) {}
+            break;
+          }
+          case InlineAsm::Kind_RegDefEarlyClobber: {
+            for (; NumVals; --NumVals, ++i) {
+              unsigned Reg =
+                cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+              // Check it to be lr
+              const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
+              if (Reg == QRI->getRARegister()) {
+                FuncInfo.setHasClobberLR(true);
+                break;
+              }
+            }
+            break;
+          }
+        }
+      }
+    }
+  } // Node->getOpcode
+  return Op;
+}
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  SDValue Align = Op.getOperand(2);
+  SDLoc dl(Op);
+
+  ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
+  assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
+
+  unsigned A = AlignConst->getSExtValue();
+  auto &HFI = *Subtarget.getFrameLowering();
+  // "Zero" means natural stack alignment.
+  if (A == 0)
+    A = HFI.getStackAlignment();
+
+  DEBUG({
+    dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
+    Size.getNode()->dump(&DAG);
+    dbgs() << "\n";
+  });
+
+  SDValue AC = DAG.getConstant(A, dl, MVT::i32);
+  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+  if (Op.getNode()->getHasDebugValue())
+    DAG.TransferDbgValues(Op, AA);
+  return AA;
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const
+                                            SmallVectorImpl<ISD::InputArg> &Ins,
+                                            SDLoc dl, SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals)
+const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+  // For LLVM, in the case when returning a struct by value (>8byte),
+  // the first argument is a pointer that points to the location on caller's
+  // stack where the return value will be stored. For Hexagon, the location on
+  // caller's stack is passed only when the struct size is smaller than (and
+  // equal to) 8 bytes. If not, no address will be passed into callee and
+  // callee return the result direclty through R0/R1.
+
+  SmallVector<SDValue, 8> MemOps;
+  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    unsigned ObjSize;
+    unsigned StackLocation;
+    int FI;
+
+    if (   (VA.isRegLoc() && !Flags.isByVal())
+        || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+      // Arguments passed in registers
+      // 1. int, long long, ptr args that get allocated in register.
+      // 2. Large struct that gets an register to put its address in.
+      EVT RegVT = VA.getLocVT();
+      if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
+          RegVT == MVT::i32 || RegVT == MVT::f32) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+      // Single Vector
+      } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
+                  RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+    } else if (UseHVX && UseHVXDbl &&
+               ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+                 RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+      // Double Vector
+      } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+                  RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (UseHVX && UseHVXDbl &&
+                ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
+                  RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
+        assert(0 && "need to support VecPred regs");
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else {
+        assert (0);
+      }
+    } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+      assert (0 && "ByValSize must be bigger than 8 bytes");
+    } else {
+      // Sanity check.
+      assert(VA.isMemLoc());
+
+      if (Flags.isByVal()) {
+        // If it's a byval parameter, then we need to compute the
+        // "real" size, not the size of the pointer.
+        ObjSize = Flags.getByValSize();
+      } else {
+        ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+      }
+
+      StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+      // Create the frame index object for this incoming parameter...
+      FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+      // Create the SelectionDAG nodes cordl, responding to a load
+      // from this parameter.
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+      if (Flags.isByVal()) {
+        // If it's a pass-by-value aggregate, then do not dereference the stack
+        // location. Instead, we should generate a reference to the stack
+        // location.
+        InVals.push_back(FIN);
+      } else {
+        InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                     MachinePointerInfo(), false, false,
+                                     false, 0));
+      }
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+  if (isVarArg) {
+    // This will point to the next argument passed via stack.
+    int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+                                            HEXAGON_LRFP_SIZE +
+                                            CCInfo.getNextStackOffset(),
+                                            true);
+    FuncInfo.setVarArgsFrameIndex(FrameIndex);
+  }
+
+  return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  // VASTART stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  MachineFunction &MF = DAG.getMachineFunction();
+  HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+  SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr,
+                      Op.getOperand(1), MachinePointerInfo(SV), false,
+                      false, 0);
+}
+
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+  if (VT.getSimpleVT() == MVT::v4i8)
+    return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+  if (VT.getSimpleVT() == MVT::v4i16)
+    return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+  return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+  // A sign-extend of a truncate of a sign-extend is free.
+  if (N.getOpcode() == ISD::TRUNCATE &&
+      N.getOperand(0).getOpcode() == ISD::AssertSext)
+    return true;
+  // We have sign-extended loads.
+  if (N.getOpcode() == ISD::LOAD)
+    return true;
+  return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue InpVal = Op.getOperand(0);
+  if (isa<ConstantSDNode>(InpVal)) {
+    uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+    return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
+  }
+  SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Cmp = Op.getOperand(2);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+  EVT VT = Op.getValueType();
+  EVT LHSVT = LHS.getValueType();
+  EVT RHSVT = RHS.getValueType();
+
+  if (LHSVT == MVT::v2i16) {
+    assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+    unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+                                                : ISD::ZERO_EXTEND;
+    SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+    SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+    SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+    return SC;
+  }
+
+  // Treat all other vector types as legal.
+  if (VT.isVector())
+    return Op;
+
+  // Equals and not equals should use sign-extend, not zero-extend, since
+  // we can represent small negative values in the compare instructions.
+  // The LLVM default is to use zero-extend arbitrarily in these cases.
+  if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+      (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+      (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+    if (C && C->getAPIntValue().isNegative()) {
+      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+                         LHS, RHS, Op.getOperand(2));
+    }
+    if (isSExtFree(LHS) || isSExtFree(RHS)) {
+      LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+      RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+      return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+                         LHS, RHS, Op.getOperand(2));
+    }
+  }
+  return SDValue();
+}
+
+SDValue
+HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue PredOp = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+  EVT OpVT = Op1.getValueType();
+  SDLoc DL(Op);
+
+  if (OpVT == MVT::v2i16) {
+    SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+    SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+    SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+    SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+    return TR;
+  }
+
+  return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = LoadNode->getChain();
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+  SDValue Result;
+  SDValue Base = LoadNode->getBasePtr();
+  ISD::LoadExtType Ext = LoadNode->getExtensionType();
+  unsigned Alignment = LoadNode->getAlignment();
+  SDValue LoadChain;
+
+  if(Ext == ISD::NON_EXTLOAD)
+    Ext = ISD::ZEXTLOAD;
+
+  if (VT == MVT::v4i16) {
+    if (Alignment == 2) {
+      SDValue Loads[4];
+      // Base load.
+      Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // Base+2 load.
+      SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // SHL 16, then OR base and base+2.
+      SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
+      SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+      SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+      // Base + 4.
+      Increment = DAG.getConstant(4, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // Base + 6.
+      Increment = DAG.getConstant(6, DL, MVT::i32);
+      Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+      Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+                                LoadNode->getPointerInfo(), MVT::i16,
+                                LoadNode->isVolatile(),
+                                LoadNode->isNonTemporal(),
+                                LoadNode->isInvariant(),
+                                Alignment);
+      // SHL 16, then OR base+4 and base+6.
+      Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+      SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+      // Combine to i64. This could be optimised out later if we can
+      // affect reg allocation of this code.
+      Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+      LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                              Loads[0].getValue(1), Loads[1].getValue(1),
+                              Loads[2].getValue(1), Loads[3].getValue(1));
+    } else {
+      // Perform default type expansion.
+      Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+                           LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+                          LoadNode->isInvariant(), LoadNode->getAlignment());
+      LoadChain = Result.getValue(1);
+    }
+  } else
+    llvm_unreachable("Custom lowering unsupported load");
+
+  Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+  // Since we pretend to lower a load, we need the original chain
+  // info attached to the result.
+  SDValue Ops[] = { Result, LoadChain };
+
+  return DAG.getMergeValues(Ops, DL);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+  EVT ValTy = Op.getValueType();
+  ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
+  unsigned Align = CPN->getAlignment();
+  Reloc::Model RM = HTM.getRelocationModel();
+  unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0;
+
+  SDValue T;
+  if (CPN->isMachineConstantPoolEntry())
+    T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
+  else
+    T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+  if (RM == Reloc::PIC_)
+    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
+  return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int Idx = cast<JumpTableSDNode>(Op)->getIndex();
+  Reloc::Model RM = HTM.getRelocationModel();
+  if (RM == Reloc::PIC_) {
+    SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
+  }
+
+  SDValue T = DAG.getTargetJumpTable(Idx, VT);
+  return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
+
+  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  SDLoc dl(Op);
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+  MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  SDLoc dl(Op);
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         HRI.getFrameRegister(), VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, false, 0);
+  return FrameAddr;
+}
+
+SDValue
+HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
+  SDLoc dl(Op);
+  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue
+HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  auto *GAN = cast<GlobalAddressSDNode>(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  auto *GV = GAN->getGlobal();
+  int64_t Offset = GAN->getOffset();
+
+  auto &HLOF = *HTM.getObjFileLowering();
+  Reloc::Model RM = HTM.getRelocationModel();
+
+  if (RM == Reloc::Static) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+    if (HLOF.IsGlobalInSmallSection(GV, HTM))
+      return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
+    return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
+  }
+
+  bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() ||
+                  (GV->hasLocalLinkage() && !isa<Function>(GV));
+  if (UsePCRel) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
+                                            HexagonII::MO_PCREL);
+    return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
+  }
+
+  // Use GOT index.
+  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
+  SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
+  return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
+}
+
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDLoc dl(Op);
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  Reloc::Model RM = HTM.getRelocationModel();
+  if (RM == Reloc::Static) {
+    SDValue A =  DAG.getTargetBlockAddress(BA, PtrVT);
+    return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
+  }
+
+  SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
+}
+
+SDValue
+HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
+      const {
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
+                                               HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
+                                             const HexagonSubtarget &ST)
+    : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
+      Subtarget(ST) {
+  bool IsV4 = !Subtarget.hasV5TOps();
+  auto &HRI = *Subtarget.getRegisterInfo();
+  bool UseHVX = Subtarget.useHVXOps();
+  bool UseHVXSgl = Subtarget.useHVXSglOps();
+  bool UseHVXDbl = Subtarget.useHVXDblOps();
+
+  setPrefLoopAlignment(4);
+  setPrefFunctionAlignment(4);
+  setMinFunctionAlignment(2);
+  setInsertFencesForAtomic(false);
+  setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+
+  if (EnableHexSDNodeSched)
+    setSchedulingPreference(Sched::VLIW);
+  else
+    setSchedulingPreference(Sched::Source);
+
+  // Limits for inline expansion of memcpy/memmove
+  MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
+  MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
+  MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
+  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
+  MaxStoresPerMemset = MaxStoresPerMemsetCL;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
+
+  //
+  // Set up register classes.
+  //
+
+  addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
+  addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
+  addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
+  addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
+  addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+  addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
+
+  if (Subtarget.hasV5TOps()) {
+    addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+    addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+  }
+
+  if (Subtarget.hasV60TOps()) {
+    if (Subtarget.useHVXSglOps()) {
+      addRegisterClass(MVT::v64i8,  &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v8i64,  &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
+    } else if (Subtarget.useHVXDblOps()) {
+      addRegisterClass(MVT::v128i8,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v64i16,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v32i32,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v16i64,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v256i8,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v64i32,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v32i64,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
+    }
+
+  }
+
+  //
+  // Handling of scalar operations.
+  //
+  // All operations default to "legal", except:
+  // - indexed loads and stores (pre-/post-incremented),
+  // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
+  //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
+  //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
+  //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
+  // which default to "expand" for at least one type.
+
+  // Misc operations.
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
+
+  setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+  // Custom legalize GlobalAddress nodes into CONST32.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
+
+  // Hexagon needs to optimize cases with negative constants.
+  setOperationAction(ISD::SETCC, MVT::i8,  Custom);
+  setOperationAction(ISD::SETCC, MVT::i16, Custom);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+  setOperationAction(ISD::VAARG,   MVT::Other, Expand);
+
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
+  if (EmitJumpTables)
+    setMinimumJumpTableEntries(2);
+  else
+    setMinimumJumpTableEntries(MinimumJumpTables);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+  // Hexagon has instructions for add/sub with carry. The problem with
+  // modeling these instructions is that they produce 2 results: Rdd and Px.
+  // To model the update of Px, we will have to use Defs[p0..p3] which will
+  // cause any predicate live range to spill. So, we pretend we dont't have
+  // these instructions.
+  setOperationAction(ISD::ADDE, MVT::i8,  Expand);
+  setOperationAction(ISD::ADDE, MVT::i16, Expand);
+  setOperationAction(ISD::ADDE, MVT::i32, Expand);
+  setOperationAction(ISD::ADDE, MVT::i64, Expand);
+  setOperationAction(ISD::SUBE, MVT::i8,  Expand);
+  setOperationAction(ISD::SUBE, MVT::i16, Expand);
+  setOperationAction(ISD::SUBE, MVT::i32, Expand);
+  setOperationAction(ISD::SUBE, MVT::i64, Expand);
+  setOperationAction(ISD::ADDC, MVT::i8,  Expand);
+  setOperationAction(ISD::ADDC, MVT::i16, Expand);
+  setOperationAction(ISD::ADDC, MVT::i32, Expand);
+  setOperationAction(ISD::ADDC, MVT::i64, Expand);
+  setOperationAction(ISD::SUBC, MVT::i8,  Expand);
+  setOperationAction(ISD::SUBC, MVT::i16, Expand);
+  setOperationAction(ISD::SUBC, MVT::i32, Expand);
+  setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
+  // Only add and sub that detect overflow are the saturating ones.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+  }
+
+  setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
+  setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
+  setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,  Promote);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,  Promote);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
+
+  // In V5, popcount can count # of 1s in i64 but returns i32.
+  // On V4 it will be expanded (set later).
+  setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
+  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+  // We custom lower i64 to i64 mul, so that it is not considered as a legal
+  // operation. There is a pattern that will match i64 mul and transform it
+  // to a series of instructions.
+  setOperationAction(ISD::MUL,   MVT::i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+  for (unsigned IntExpOp :
+       { ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
+         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
+         ISD::BSWAP,     ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+         ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
+    setOperationAction(IntExpOp, MVT::i32, Expand);
+    setOperationAction(IntExpOp, MVT::i64, Expand);
+  }
+
+  for (unsigned FPExpOp :
+       {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+        ISD::FPOW, ISD::FCOPYSIGN}) {
+    setOperationAction(FPExpOp, MVT::f32, Expand);
+    setOperationAction(FPExpOp, MVT::f64, Expand);
+  }
+
+  // No extending loads from i32.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
+  }
+  // Turn FP truncstore into trunc + store.
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  // Turn FP extload into load/fextend.
+  for (MVT VT : MVT::fp_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+
+  // Expand BR_CC and SELECT_CC for all integer and fp types.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::BR_CC,     VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+  }
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setOperationAction(ISD::BR_CC,     VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+  }
+  setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+  //
+  // Handling of vector operations.
+  //
+
+  // Custom lower v4i16 load only. Let v4i16 store to be
+  // promoted for now.
+  promoteLdStType(MVT::v4i8,  MVT::i32);
+  promoteLdStType(MVT::v2i16, MVT::i32);
+  promoteLdStType(MVT::v8i8,  MVT::i64);
+  promoteLdStType(MVT::v2i32, MVT::i64);
+
+  setOperationAction(ISD::LOAD,  MVT::v4i16, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+  AddPromotedToType(ISD::LOAD,  MVT::v4i16, MVT::i64);
+  AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+
+  // Set the action for vector operations to "expand", then override it with
+  // either "custom" or "legal" for specific cases.
+  static const unsigned VectExpOps[] = {
+    // Integer arithmetic:
+    ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,    ISD::UDIV,
+    ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
+    ISD::SUBC,    ISD::SADDO,   ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,
+    ISD::SMUL_LOHI,             ISD::UMUL_LOHI,
+    // Logical/bit:
+    ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
+    ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,    ISD::CTLZ_ZERO_UNDEF,
+    ISD::CTTZ_ZERO_UNDEF,
+    // Floating point arithmetic/math functions:
+    ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
+    ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
+    ISD::FCOS,    ISD::FPOWI,   ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
+    ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
+    ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
+    ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
+    // Misc:
+    ISD::SELECT,  ISD::ConstantPool,
+    // Vector:
+    ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
+    ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
+    ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
+    ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
+  };
+
+  for (MVT VT : MVT::vector_valuetypes()) {
+    for (unsigned VectExpOp : VectExpOps)
+      setOperationAction(VectExpOp, VT, Expand);
+
+    // Expand all extended loads and truncating stores:
+    for (MVT TargetVT : MVT::vector_valuetypes()) {
+      setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
+      setTruncStoreAction(VT, TargetVT, Expand);
+    }
+
+    setOperationAction(ISD::SRA, VT, Custom);
+    setOperationAction(ISD::SHL, VT, Custom);
+    setOperationAction(ISD::SRL, VT, Custom);
+  }
+
+  // Types natively supported:
+  for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
+                       MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
+                       MVT::v2i32, MVT::v1i64}) {
+    setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
+
+    setOperationAction(ISD::ADD, NativeVT, Legal);
+    setOperationAction(ISD::SUB, NativeVT, Legal);
+    setOperationAction(ISD::MUL, NativeVT, Legal);
+    setOperationAction(ISD::AND, NativeVT, Legal);
+    setOperationAction(ISD::OR,  NativeVT, Legal);
+    setOperationAction(ISD::XOR, NativeVT, Legal);
+  }
+
+  setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
+  setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
+  if (UseHVX) {
+    if (UseHVXSgl) {
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64,  Custom);
+    } else if (UseHVXDbl) {
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64,  Custom);
+    } else {
+      llvm_unreachable("Unrecognized HVX mode");
+    }
+  }
+  // Subtarget-specific operation actions.
+  //
+  if (Subtarget.hasV5TOps()) {
+    setOperationAction(ISD::FMA,  MVT::f64, Expand);
+    setOperationAction(ISD::FADD, MVT::f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+    setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+  } else { // V4
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
+    setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+    setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
+    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+    // Expand these operations for both f32 and f64:
+    for (unsigned FPExpOpV4 :
+         {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
+      setOperationAction(FPExpOpV4, MVT::f32, Expand);
+      setOperationAction(FPExpOpV4, MVT::f64, Expand);
+    }
+
+    for (ISD::CondCode FPExpCCV4 :
+         {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
+          ISD::SETUO,  ISD::SETO}) {
+      setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
+      setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
+    }
+  }
+
+  // Handling of indexed loads/stores: default is "expand".
+  //
+  for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+    setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
+    setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
+  }
+
+  if (UseHVXDbl) {
+    for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+    }
+  }
+
+  computeRegisterProperties(&HRI);
+
+  //
+  // Library calls for unsupported operations
+  //
+  bool FastMath  = EnableFastMath;
+
+  setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+  setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+  setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+  setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+  setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+  setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+  setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+  setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+
+  setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+  setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+  setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+  setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+  if (IsV4) {
+    // Handle single-precision floating point operations on V4.
+    if (FastMath) {
+      setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
+      setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
+      setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
+      setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
+      setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
+      // Double-precision compares.
+      setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
+      setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
+    } else {
+      setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+      setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+      setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+      setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+      setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+      // Double-precision compares.
+      setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+      setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+    }
+  }
+
+  // This is the only fast library function for sqrtd.
+  if (FastMath)
+    setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
+
+  // Prefix is: nothing  for "slow-math",
+  //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
+  // (actually, keep fast-math and fast-math2 separate for now)
+  if (FastMath) {
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
+    // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
+  } else {
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+  }
+
+  if (Subtarget.hasV5TOps()) {
+    if (FastMath)
+      setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+    else
+      setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
+  } else {
+    // V4
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+    setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
+    setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
+    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+    setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
+    setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
+    setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
+    setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
+  }
+
+  // These cause problems when the shift amount is non-constant.
+  setLibcallName(RTLIB::SHL_I128, nullptr);
+  setLibcallName(RTLIB::SRL_I128, nullptr);
+  setLibcallName(RTLIB::SRA_I128, nullptr);
+}
+
+
+const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch ((HexagonISD::NodeType)Opcode) {
+  case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
+  case HexagonISD::ARGEXTEND:     return "HexagonISD::ARGEXTEND";
+  case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
+  case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
+  case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
+  case HexagonISD::CALLR:         return "HexagonISD::CALLR";
+  case HexagonISD::CALLv3nr:      return "HexagonISD::CALLv3nr";
+  case HexagonISD::CALLv3:        return "HexagonISD::CALLv3";
+  case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
+  case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
+  case HexagonISD::CONST32:       return "HexagonISD::CONST32";
+  case HexagonISD::CP:            return "HexagonISD::CP";
+  case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
+  case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
+  case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
+  case HexagonISD::EXTRACTURP:    return "HexagonISD::EXTRACTURP";
+  case HexagonISD::FCONST32:      return "HexagonISD::FCONST32";
+  case HexagonISD::INSERT:        return "HexagonISD::INSERT";
+  case HexagonISD::INSERTRP:      return "HexagonISD::INSERTRP";
+  case HexagonISD::JT:            return "HexagonISD::JT";
+  case HexagonISD::PACKHL:        return "HexagonISD::PACKHL";
+  case HexagonISD::POPCOUNT:      return "HexagonISD::POPCOUNT";
+  case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
+  case HexagonISD::SHUFFEB:       return "HexagonISD::SHUFFEB";
+  case HexagonISD::SHUFFEH:       return "HexagonISD::SHUFFEH";
+  case HexagonISD::SHUFFOB:       return "HexagonISD::SHUFFOB";
+  case HexagonISD::SHUFFOH:       return "HexagonISD::SHUFFOH";
+  case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
+  case HexagonISD::VCMPBEQ:       return "HexagonISD::VCMPBEQ";
+  case HexagonISD::VCMPBGT:       return "HexagonISD::VCMPBGT";
+  case HexagonISD::VCMPBGTU:      return "HexagonISD::VCMPBGTU";
+  case HexagonISD::VCMPHEQ:       return "HexagonISD::VCMPHEQ";
+  case HexagonISD::VCMPHGT:       return "HexagonISD::VCMPHGT";
+  case HexagonISD::VCMPHGTU:      return "HexagonISD::VCMPHGTU";
+  case HexagonISD::VCMPWEQ:       return "HexagonISD::VCMPWEQ";
+  case HexagonISD::VCMPWGT:       return "HexagonISD::VCMPWGT";
+  case HexagonISD::VCMPWGTU:      return "HexagonISD::VCMPWGTU";
+  case HexagonISD::VCOMBINE:      return "HexagonISD::VCOMBINE";
+  case HexagonISD::VSHLH:         return "HexagonISD::VSHLH";
+  case HexagonISD::VSHLW:         return "HexagonISD::VSHLW";
+  case HexagonISD::VSPLATB:       return "HexagonISD::VSPLTB";
+  case HexagonISD::VSPLATH:       return "HexagonISD::VSPLATH";
+  case HexagonISD::VSRAH:         return "HexagonISD::VSRAH";
+  case HexagonISD::VSRAW:         return "HexagonISD::VSRAW";
+  case HexagonISD::VSRLH:         return "HexagonISD::VSRLH";
+  case HexagonISD::VSRLW:         return "HexagonISD::VSRLW";
+  case HexagonISD::VSXTBH:        return "HexagonISD::VSXTBH";
+  case HexagonISD::VSXTBW:        return "HexagonISD::VSXTBW";
+  case HexagonISD::OP_END:        break;
+  }
+  return nullptr;
+}
+
+bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  EVT MTy1 = EVT::getEVT(Ty1);
+  EVT MTy2 = EVT::getEVT(Ty2);
+  if (!MTy1.isSimple() || !MTy2.isSimple())
+    return false;
+  return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isSimple() || !VT2.isSimple())
+    return false;
+  return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+}
+
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+                                  unsigned DefinedValues) const {
+
+  // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+  EVT EltVT = VT.getVectorElementType();
+  int EltBits = EltVT.getSizeInBits();
+  if ((EltBits != 8) && (EltBits != 16))
+    return false;
+
+  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3).  V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  if (SVN->isSplat()) {
+    int Lane = SVN->getSplatIndex();
+    if (Lane == -1) Lane = 0;
+
+    // Test if V1 is a SCALAR_TO_VECTOR.
+    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+    // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+    // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+    // reaches it).
+    if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+        !isa<ConstantSDNode>(V1.getOperand(0))) {
+      bool IsScalarToVector = true;
+      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+        if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+          IsScalarToVector = false;
+          break;
+        }
+      if (IsScalarToVector)
+        return createSplat(DAG, dl, VT, V1.getOperand(0));
+    }
+    return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
+  }
+
+  // FIXME: We need to support more general vector shuffles.  See
+  // below the comment from the ARM backend that deals in the general
+  // case with the vector shuffles.  For now, let expand handle these.
+  return SDValue();
+
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
+
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+  unsigned NElts = BVN->getNumOperands();
+  SDValue V0 = BVN->getOperand(0);
+
+  for (unsigned i = 1, e = NElts; i != e; ++i) {
+    if (BVN->getOperand(i) != V0)
+      return false;
+  }
+  return true;
+}
+
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+  BuildVectorSDNode *BVN = 0;
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  SDValue V3;
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+
+  if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+      isCommonSplatElement(BVN))
+    V3 = V2;
+  else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+           isCommonSplatElement(BVN))
+    V3 = V1;
+  else
+    return SDValue();
+
+  SDValue CommonSplat = BVN->getOperand(0);
+  SDValue Result;
+
+  if (VT.getSimpleVT() == MVT::v4i16) {
+    switch (Op.getOpcode()) {
+    case ISD::SRA:
+      Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SHL:
+      Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SRL:
+      Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+      break;
+    default:
+      return SDValue();
+    }
+  } else if (VT.getSimpleVT() == MVT::v2i32) {
+    switch (Op.getOpcode()) {
+    case ISD::SRA:
+      Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SHL:
+      Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+      break;
+    case ISD::SRL:
+      Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+      break;
+    default:
+      return SDValue();
+    }
+  } else {
+    return SDValue();
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
+
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+
+  unsigned Size = VT.getSizeInBits();
+
+  // Only handle vectors of 64 bits or shorter.
+  if (Size > 64)
+    return SDValue();
+
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  unsigned NElts = BVN->getNumOperands();
+
+  // Try to generate a SPLAT instruction.
+  if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+      (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                            HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+    unsigned SplatBits = APSplatBits.getZExtValue();
+    int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+                       (32 - SplatBitSize));
+    return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
+  }
+
+  // Try to generate COMBINE to build v2i32 vectors.
+  if (VT.getSimpleVT() == MVT::v2i32) {
+    SDValue V0 = BVN->getOperand(0);
+    SDValue V1 = BVN->getOperand(1);
+
+    if (V0.getOpcode() == ISD::UNDEF)
+      V0 = DAG.getConstant(0, dl, MVT::i32);
+    if (V1.getOpcode() == ISD::UNDEF)
+      V1 = DAG.getConstant(0, dl, MVT::i32);
+
+    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+    // If the element isn't a constant, it is in a register:
+    // generate a COMBINE Register Register instruction.
+    if (!C0 || !C1)
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+    // If one of the operands is an 8 bit integer constant, generate
+    // a COMBINE Immediate Immediate instruction.
+    if (isInt<8>(C0->getSExtValue()) ||
+        isInt<8>(C1->getSExtValue()))
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+  }
+
+  // Try to generate a S2_packhl to build v2i16 vectors.
+  if (VT.getSimpleVT() == MVT::v2i16) {
+    for (unsigned i = 0, e = NElts; i != e; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+        continue;
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+      // If the element isn't a constant, it is in a register:
+      // generate a S2_packhl instruction.
+      if (!Cst) {
+        SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+                                   BVN->getOperand(1), BVN->getOperand(0));
+
+        return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+                                          pack);
+      }
+    }
+  }
+
+  // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+  // and insert_vector_elt for all the other cases.
+  uint64_t Res = 0;
+  unsigned EltSize = Size / NElts;
+  SDValue ConstVal;
+  uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+  bool HasNonConstantElements = false;
+
+  for (unsigned i = 0, e = NElts; i != e; ++i) {
+    // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+    // combine, const64, etc. are Big Endian.
+    unsigned OpIdx = NElts - i - 1;
+    SDValue Operand = BVN->getOperand(OpIdx);
+    if (Operand.getOpcode() == ISD::UNDEF)
+      continue;
+
+    int64_t Val = 0;
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+      Val = Cst->getSExtValue();
+    else
+      HasNonConstantElements = true;
+
+    Val &= Mask;
+    Res = (Res << EltSize) | Val;
+  }
+
+  if (Size == 64)
+    ConstVal = DAG.getConstant(Res, dl, MVT::i64);
+  else
+    ConstVal = DAG.getConstant(Res, dl, MVT::i32);
+
+  // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+  // ConstVal, the constant part of the vector.
+  if (HasNonConstantElements) {
+    EVT EltVT = VT.getVectorElementType();
+    SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), dl, MVT::i64);
+    SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                  DAG.getConstant(32, dl, MVT::i64));
+
+    for (unsigned i = 0, e = NElts; i != e; ++i) {
+      // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+      // is Big Endian.
+      unsigned OpIdx = NElts - i - 1;
+      SDValue Operand = BVN->getOperand(OpIdx);
+      if (isa<ConstantSDNode>(Operand))
+        // This operand is already in ConstVal.
+        continue;
+
+      if (VT.getSizeInBits() == 64 &&
+          Operand.getValueType().getSizeInBits() == 32) {
+        SDValue C = DAG.getConstant(0, dl, MVT::i32);
+        Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+      }
+
+      SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
+      SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+      SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+      const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+      if (VT.getSizeInBits() == 32)
+        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+      else
+        ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+    }
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  bool UseHVX = Subtarget.useHVXOps();
+  EVT VT = Op.getValueType();
+  unsigned NElts = Op.getNumOperands();
+  SDValue Vec0 = Op.getOperand(0);
+  EVT VecVT = Vec0.getValueType();
+  unsigned Width = VecVT.getSizeInBits();
+
+  if (NElts == 2) {
+    MVT ST = VecVT.getSimpleVT();
+    // We are trying to concat two v2i16 to a single v4i16, or two v4i8
+    // into a single v8i8.
+    if (ST == MVT::v2i16 || ST == MVT::v4i8)
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
+
+    if (UseHVX) {
+      assert((Width ==  64*8 && Subtarget.useHVXSglOps()) ||
+             (Width == 128*8 && Subtarget.useHVXDblOps()));
+      SDValue Vec1 = Op.getOperand(1);
+      MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
+      MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
+      SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
+      SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
+      SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
+      return DAG.getNode(ISD::BITCAST, dl, VT, VC);
+    }
+  }
+
+  if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
+    return SDValue();
+
+  SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
+  SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
+  SDValue W = DAG.getConstant(Width, dl, MVT::i64);
+  // Create the "width" part of the argument to insert_rp/insertp_rp.
+  SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
+  SDValue V = C0;
+
+  for (unsigned i = 0, e = NElts; i != e; ++i) {
+    unsigned N = NElts-i-1;
+    SDValue OpN = Op.getOperand(N);
+
+    if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
+      SDValue C = DAG.getConstant(0, dl, MVT::i32);
+      OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
+    }
+    SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
+    SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
+    if (VT.getSizeInBits() == 32)
+      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
+    else
+      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, V);
+}
+
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+  SDLoc dl(Op);
+  SDValue Idx = Op.getOperand(1);
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  int EltSize = EltVT.getSizeInBits();
+  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+                                  EltSize : VTN * EltSize, dl, MVT::i64);
+
+  // Constant element number.
+  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Idx)) {
+    uint64_t X = CI->getZExtValue();
+    SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32);
+    const SDValue Ops[] = {Vec, Width, Offset};
+
+    ConstantSDNode *CW = dyn_cast<ConstantSDNode>(Width);
+    assert(CW && "Non constant width in LowerEXTRACT_VECTOR");
+
+    SDValue N;
+    MVT SVT = VecVT.getSimpleVT();
+    uint64_t W = CW->getZExtValue();
+
+    if (W == 32) {
+      // Translate this node into EXTRACT_SUBREG.
+      unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
+
+      if (X == 0)
+        Subreg = Hexagon::subreg_loreg;
+      else if (SVT == MVT::v2i32 && X == 1)
+        Subreg = Hexagon::subreg_hireg;
+      else if (SVT == MVT::v4i16 && X == 2)
+        Subreg = Hexagon::subreg_hireg;
+      else if (SVT == MVT::v8i8 && X == 4)
+        Subreg = Hexagon::subreg_hireg;
+      else
+        llvm_unreachable("Bad offset");
+      N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
+
+    } else if (VecVT.getSizeInBits() == 32) {
+      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
+    } else {
+      N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
+      if (VT.getSizeInBits() == 32)
+        N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+    }
+
+    return DAG.getNode(ISD::BITCAST, dl, VT, N);
+  }
+
+  // Variable element number.
+  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+                               DAG.getConstant(EltSize, dl, MVT::i32));
+  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                DAG.getConstant(32, dl, MVT::i64));
+  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+  const SDValue Ops[] = {Vec, Combined};
+
+  SDValue N;
+  if (VecVT.getSizeInBits() == 32) {
+    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops);
+  } else {
+    N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
+    if (VT.getSizeInBits() == 32)
+      N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+  }
+  return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue Val = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  int EltSize = EltVT.getSizeInBits();
+  SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+                                  EltSize : VTN * EltSize, dl, MVT::i64);
+
+  if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+    SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32);
+    const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+    SDValue N;
+    if (VT.getSizeInBits() == 32)
+      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
+    else
+      N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
+
+    return DAG.getNode(ISD::BITCAST, dl, VT, N);
+  }
+
+  // Variable element number.
+  SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+                               DAG.getConstant(EltSize, dl, MVT::i32));
+  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+                                DAG.getConstant(32, dl, MVT::i64));
+  SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+  if (VT.getSizeInBits() == 64 &&
+      Val.getValueType().getSizeInBits() == 32) {
+    SDValue C = DAG.getConstant(0, dl, MVT::i32);
+    Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
+  }
+
+  const SDValue Ops[] = {Vec, Val, Combined};
+
+  SDValue N;
+  if (VT.getSizeInBits() == 32)
+    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+  else
+    N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+bool
+HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+  // Assuming the caller does not have either a signext or zeroext modifier, and
+  // only one value is accepted, any reasonable truncation is allowed.
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+
+  // FIXME: in principle up to 64-bit could be made safe, but it would be very
+  // fragile at the moment: any support for multiple value returns would be
+  // liable to disallow tail calls involving i64 -> iN truncation in many cases.
+  return Ty1->getPrimitiveSizeInBits() <= 32;
+}
+
+SDValue
+HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  SDLoc dl(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // Mark function as containing a call to EH_RETURN.
+  HexagonMachineFunctionInfo *FuncInfo =
+    DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
+  FuncInfo->setHasEHReturn();
+
+  unsigned OffsetReg = Hexagon::R28;
+
+  SDValue StoreAddr =
+      DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
+                  DAG.getIntPtrConstant(4, dl));
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
+  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+
+  // Not needed we already use it as explict input to EH_RETURN.
+  // MF.getRegInfo().addLiveOut(OffsetReg);
+
+  return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+    default:
+#ifndef NDEBUG
+      Op.getNode()->dumpr(&DAG);
+      if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
+        errs() << "Check for a non-legal type in this operation\n";
+#endif
+      llvm_unreachable("Should not custom lower this!");
+    case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
+    case ISD::INSERT_SUBVECTOR:     return LowerINSERT_VECTOR(Op, DAG);
+    case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR(Op, DAG);
+    case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_VECTOR(Op, DAG);
+    case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR(Op, DAG);
+    case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
+    case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
+    case ISD::SRA:
+    case ISD::SHL:
+    case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
+    case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
+    case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
+    case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
+      // Frame & Return address. Currently unimplemented.
+    case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
+    case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
+    case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
+    case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
+    case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
+    case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+    case ISD::VASTART:              return LowerVASTART(Op, DAG);
+    // Custom lower some vector loads.
+    case ISD::LOAD:                 return LowerLOAD(Op, DAG);
+    case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::SETCC:                return LowerSETCC(Op, DAG);
+    case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
+    case ISD::CTPOP:                return LowerCTPOP(Op, DAG);
+    case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+    case ISD::INLINEASM:            return LowerINLINEASM(Op, DAG);
+  }
+}
+
+/// Returns relocation base for the given PIC jumptable.
+SDValue
+HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                SelectionDAG &DAG) const {
+  int Idx = cast<JumpTableSDNode>(Table)->getIndex();
+  EVT VT = Table.getValueType();
+  SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
+}
+
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB)
+      const {
+  switch (MI->getOpcode()) {
+    case Hexagon::ALLOCA: {
+      MachineFunction *MF = BB->getParent();
+      auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
+      FuncInfo->addAllocaAdjustInst(MI);
+      return BB;
+    }
+    default: llvm_unreachable("Unexpected instr type to insert");
+  } // switch
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+HexagonTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':   // R0-R31
+       switch (VT.SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::i32:
+       case MVT::i16:
+       case MVT::i8:
+       case MVT::f32:
+         return std::make_pair(0U, &Hexagon::IntRegsRegClass);
+       case MVT::i64:
+       case MVT::f64:
+         return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
+      }
+    case 'q': // q0-q3
+       switch (VT.SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::v1024i1:
+       case MVT::v512i1:
+       case MVT::v32i16:
+       case MVT::v16i32:
+       case MVT::v64i8:
+       case MVT::v8i64:
+         return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+    }
+    case 'v': // V0-V31
+       switch (VT.SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::v16i32:
+       case MVT::v32i16:
+       case MVT::v64i8:
+       case MVT::v8i64:
+         return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+       case MVT::v32i32:
+       case MVT::v64i16:
+       case MVT::v16i64:
+       case MVT::v128i8:
+         if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+           return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+         else
+           return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+       case MVT::v256i8:
+       case MVT::v128i16:
+       case MVT::v64i32:
+       case MVT::v32i64:
+         return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+       }
+
+    default:
+      llvm_unreachable("Unknown asm register class");
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  return Subtarget.hasV5TOps();
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                  const AddrMode &AM, Type *Ty,
+                                                  unsigned AS) const {
+  // Allows a signed-extended 11-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  int Scale = AM.Scale;
+  if (Scale < 0) Scale = -Scale;
+  switch (Scale) {
+  case 0:  // No scale reg, "r+i", "r", or just "i".
+    break;
+  default: // No scaled addressing mode.
+    return false;
+  }
+  return true;
+}
+
+/// Return true if folding a constant offset with the given GlobalAddress is
+/// legal.  It is frequently not legal in PIC relocation models.
+bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
+      const {
+  return HTM.getRelocationModel() == Reloc::Static;
+}
+
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+                                 SDValue Callee,
+                                 CallingConv::ID CalleeCC,
+                                 bool isVarArg,
+                                 bool isCalleeStructRet,
+                                 bool isCallerStructRet,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 SelectionDAG& DAG) const {
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // ***************************************************************************
+  //  Look for obvious safe cases to perform tail call optimization that do not
+  //  require ABI changes.
+  // ***************************************************************************
+
+  // If this is a tail call via a function pointer, then don't do it!
+  if (!(isa<GlobalAddressSDNode>(Callee)) &&
+      !(isa<ExternalSymbolSDNode>(Callee))) {
+    return false;
+  }
+
+  // Do not optimize if the calling conventions do not match.
+  if (!CCMatch)
+    return false;
+
+  // Do not tail call optimize vararg calls.
+  if (isVarArg)
+    return false;
+
+  // Also avoid tail call optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // In addition to the cases above, we also disable Tail Call Optimization if
+  // the calling convention code that at least one outgoing argument needs to
+  // go on the stack. We cannot check that here because at this point that
+  // information is not available.
+  return true;
+}
+
+// Return true when the given node fits in a positive half word.
+bool llvm::isPositiveHalfWord(SDNode *N) {
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue()))
+    return true;
+
+  switch (N->getOpcode()) {
+  default:
+    return false;
+  case ISD::SIGN_EXTEND_INREG:
+    return true;
+  }
+}
+
+std::pair<const TargetRegisterClass*, uint8_t>
+HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+      MVT VT) const {
+  const TargetRegisterClass *RRC = nullptr;
+
+  uint8_t Cost = 1;
+  switch (VT.SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(TRI, VT);
+  case MVT::v64i8:
+  case MVT::v32i16:
+  case MVT::v16i32:
+  case MVT::v8i64:
+    RRC = &Hexagon::VectorRegsRegClass;
+    break;
+  case MVT::v128i8:
+  case MVT::v64i16:
+  case MVT::v32i32:
+  case MVT::v16i64:
+    if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
+        Subtarget.useHVXDblOps())
+      RRC = &Hexagon::VectorRegs128BRegClass;
+    else
+      RRC = &Hexagon::VecDblRegsRegClass;
+    break;
+  case MVT::v256i8:
+  case MVT::v128i16:
+  case MVT::v64i32:
+  case MVT::v32i64:
+    RRC = &Hexagon::VecDblRegs128BRegClass;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
+Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+      AtomicOrdering Ord) const {
+  BasicBlock *BB = Builder.GetInsertBlock();
+  Module *M = BB->getParent()->getParent();
+  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+  unsigned SZ = Ty->getPrimitiveSizeInBits();
+  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
+  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
+                                   : Intrinsic::hexagon_L4_loadd_locked;
+  Value *Fn = Intrinsic::getDeclaration(M, IntID);
+  return Builder.CreateCall(Fn, Addr, "larx");
+}
+
+/// Perform a store-conditional operation to Addr. Return the status of the
+/// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+      Value *Val, Value *Addr, AtomicOrdering Ord) const {
+  BasicBlock *BB = Builder.GetInsertBlock();
+  Module *M = BB->getParent()->getParent();
+  Type *Ty = Val->getType();
+  unsigned SZ = Ty->getPrimitiveSizeInBits();
+  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
+  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
+                                   : Intrinsic::hexagon_S4_stored_locked;
+  Value *Fn = Intrinsic::getDeclaration(M, IntID);
+  Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
+  Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
+  Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
+  return Ext;
+}
+
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+  // Do not expand loads and stores that don't exceed 64 bits.
+  return LI->getType()->getPrimitiveSizeInBits() > 64
+             ? AtomicExpansionKind::LLOnly
+             : AtomicExpansionKind::None;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+  // Do not expand loads and stores that don't exceed 64 bits.
+  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 0000000..bf378b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,252 @@
+//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+// Return true when the given node fits in a positive half word.
+bool isPositiveHalfWord(SDNode *N);
+
+  namespace HexagonISD {
+    enum NodeType : unsigned {
+      OP_BEGIN = ISD::BUILTIN_OP_END,
+
+      CONST32 = OP_BEGIN,
+      CONST32_GP,  // For marking data present in GP.
+      FCONST32,
+      ALLOCA,
+      ARGEXTEND,
+
+      AT_GOT,      // Index in GOT.
+      AT_PCREL,    // Offset relative to PC.
+
+      CALLv3,      // A V3+ call instruction.
+      CALLv3nr,    // A V3+ call instruction that doesn't return.
+      CALLR,
+
+      RET_FLAG,    // Return with a flag operand.
+      BARRIER,     // Memory barrier.
+      JT,          // Jump table.
+      CP,          // Constant pool.
+
+      POPCOUNT,
+      COMBINE,
+      PACKHL,
+      VSPLATB,
+      VSPLATH,
+      SHUFFEB,
+      SHUFFEH,
+      SHUFFOB,
+      SHUFFOH,
+      VSXTBH,
+      VSXTBW,
+      VSRAW,
+      VSRAH,
+      VSRLW,
+      VSRLH,
+      VSHLW,
+      VSHLH,
+      VCMPBEQ,
+      VCMPBGT,
+      VCMPBGTU,
+      VCMPHEQ,
+      VCMPHGT,
+      VCMPHGTU,
+      VCMPWEQ,
+      VCMPWGT,
+      VCMPWGTU,
+
+      INSERT,
+      INSERTRP,
+      EXTRACTU,
+      EXTRACTURP,
+      VCOMBINE,
+      TC_RETURN,
+      EH_RETURN,
+      DCFETCH,
+
+      OP_END
+    };
+  }
+
+  class HexagonSubtarget;
+
+  class HexagonTargetLowering : public TargetLowering {
+    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+
+    bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
+        const;
+    void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+    const HexagonTargetMachine &HTM;
+    const HexagonSubtarget &Subtarget;
+
+  public:
+    explicit HexagonTargetLowering(const TargetMachine &TM,
+                                   const HexagonSubtarget &ST);
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+        CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet,
+        bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
+
+    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+    bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+
+    // Should we expand the build vector with shuffles?
+    bool shouldExpandBuildVectorWithShuffles(EVT VT,
+        unsigned DefinedValues) const override;
+
+    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+    const char *getTargetNodeName(unsigned Opcode) const override;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+        bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
+    SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+        SmallVectorImpl<SDValue> &InVals) const override;
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+        CallingConv::ID CallConv, bool isVarArg,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+        const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const;
+
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+        bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+        SelectionDAG &DAG) const override;
+
+    bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+    MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+        MachineBasicBlock *BB) const override;
+
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+      return Hexagon::R0;
+    }
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+      return Hexagon::R1;
+    }
+
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
+                           EVT VT) const override {
+      if (!VT.isVector())
+        return MVT::i1;
+      else
+        return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+    }
+
+    bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                    SDValue &Base, SDValue &Offset,
+                                    ISD::MemIndexedMode &AM,
+                                    SelectionDAG &DAG) const override;
+
+    std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                 StringRef Constraint, MVT VT) const override;
+
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+      if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
+      else if (ConstraintCode == "v")
+        return InlineAsm::Constraint_v;
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+    }
+
+    // Intrinsics
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    /// The type may be VoidTy, in which case only return true if the addressing
+    /// mode is legal for a load/store of any legal type.
+    /// TODO: Handle pre/postinc as well.
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
+    /// Return true if folding a constant offset with the given GlobalAddress
+    /// is legal.  It is frequently not legal in PIC relocation models.
+    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    bool isLegalICmpImmediate(int64_t Imm) const override;
+
+    /// Returns relocation base for the given PIC jumptable.
+    SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+                                     const override;
+
+    // Handling of atomic RMW instructions.
+    Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+        AtomicOrdering Ord) const override;
+    Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+        Value *Addr, AtomicOrdering Ord) const override;
+    AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+    AtomicExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+      return AtomicExpansionKind::LLSC;
+    }
+
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+        const override;
+  };
+} // end namespace llvm
+
+#endif    // Hexagon_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
new file mode 100644
index 0000000..5a1a69b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -0,0 +1,462 @@
+//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//                     Hexagon Instruction Mappings
+//===----------------------------------------------------------------------===//
+
+
+def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
+                (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
+                (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
+                (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memb({GP}+#$addr) = $Nt",
+                (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt",
+                (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
+                (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt",
+                (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memd({GP}+#$addr) = $Nt",
+                (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
+
+def : InstAlias<"$Nt = memb({GP}+#$addr)",
+                (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memub({GP}+#$addr)",
+                (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memh({GP}+#$addr)",
+                (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memuh({GP}+#$addr)",
+                (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memw({GP}+#$addr)",
+                (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
+def : InstAlias<"$Nt = memd({GP}+#$addr)",
+                (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
+
+// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
+def : InstAlias<"memb($Rs) = $Rt",
+      (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt",
+      (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.h",
+      (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt",
+      (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = $Rt.new",
+      (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.new",
+      (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt.new",
+      (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = #$S8",
+      (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memh($Rs) = #$S8",
+      (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memw($Rs) = #$S8",
+      (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memd($Rs) = $Rtt",
+      (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"memb($Rs) = setbit(#$U5)",
+      (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = setbit(#$U5)",
+      (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = setbit(#$U5)",
+      (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memb($Rs) = clrbit(#$U5)",
+      (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = clrbit(#$U5)",
+      (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = clrbit(#$U5)",
+      (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
+def : InstAlias<"$Rd = memb($Rs)",
+      (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memub($Rs)",
+      (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memh($Rs)",
+      (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memuh($Rs)",
+      (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memw($Rs)",
+      (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memd($Rs)",
+      (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memubh($Rs)",
+      (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memubh($Rs)",
+      (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = membh($Rs)",
+      (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = membh($Rs)",
+      (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memb_fifo($Rs)",
+      (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memh_fifo($Rs)",
+      (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
+//       to: if ($Pt) $Rd = memXX($Rs)
+def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
+      (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
+      (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
+      (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
+      (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
+      (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
+      (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
+//       to: if ($Pt) memXX($Rs) = $Rt
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
+      (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
+      (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
+      (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
+      (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
+      (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
+      (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
+      (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
+      (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
+      (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
+      (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
+      (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+
+// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
+//       to: if (!$Pt) $Rd = memXX($Rs)
+def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
+      (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
+      (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
+      (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
+      (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
+      (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
+      (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
+//       to: if (!$Pt) memXX($Rs) = $Rt
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
+      (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
+      (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
+      (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
+      (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
+      (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
+      (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
+      (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
+      (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
+      (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
+      (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
+      (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
+      (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
+      (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
+      (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
+      (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
+      (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
+      (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
+      (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
+      (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
+      (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
+      (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
+      (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
+      (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
+//       to: memXX($Rs) |= $Rt
+def : InstAlias<"memb($Rs) &= $Rt",
+      (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) |= $Rt",
+      (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += $Rt",
+      (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= $Rt",
+      (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += #$U5",
+      (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= #$U5",
+      (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) &= $Rt",
+      (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) |= $Rt",
+      (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += $Rt",
+      (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= $Rt",
+      (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += #$U5",
+      (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= #$U5",
+      (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) &= $Rt",
+      (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) |= $Rt",
+      (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += $Rt",
+      (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= $Rt",
+      (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += #$U5",
+      (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= #$U5",
+      (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+//
+// Alias of: if ($Pv.new) memX($Rs) = $Rt
+//       to: if (p3.new) memX(r17 + #0) = $Rt
+def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
+      (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
+      (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
+      (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
+      (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
+      (S4_pstorerdtnew_io
+       PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
+      (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
+      (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
+      (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
+      (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
+      (S4_pstorerdfnew_io
+       PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+//
+// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
+//       to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
+def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
+      (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
+      (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
+      (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
+      (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
+      (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
+      (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
+      (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
+      (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
+      (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
+      (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
+      (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
+      (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"dcfetch($Rs)",
+      (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
+
+// Alias of some insn mappings, others must be handled by the parser
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+      (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+      (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
+// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
+def : InstAlias<"$Rd = neg($Rs)",
+      (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
+
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+def : InstAlias<"$Pd = $Ps",
+      (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
+
+def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
+      (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
+
+def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
+      (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
+      (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
+
+// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+      (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+      (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
new file mode 100644
index 0000000..280832f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
@@ -0,0 +1,1019 @@
+class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { opc{14-4}, src2};
+  let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
+}
+
+class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
+class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
+class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
+class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
+class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
+class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
+class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
+class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
+class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
+class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
+class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
+class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
+class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
+class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
+class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
+class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
+class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
+class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
+class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
+class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
+class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
+class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
+class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
+class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
+class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
+class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
+class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
+class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
+class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
+class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
+class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
+class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
+class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
+class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
+class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
+class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
+class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
+class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
+class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
+class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
+class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
+class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
+class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
+class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
+class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
+class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
+class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
+class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
+class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
+class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
+class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
+class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
+class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
+class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
+class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
+class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
+class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
+class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
+class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
+class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
+class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
+class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
+class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
+class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
+class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
+class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
+class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
+class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
+class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
+class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
+class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
+class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
+class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
+class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
+class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
+class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
+class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
+class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
+class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
+class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
+class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
+class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
+class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
+class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
+class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
+class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
+class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
+class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
+class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
+class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
+class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
+class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
+class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
+class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
+class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
+class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
+class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
+class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
+class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
+class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
+class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
+class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
+class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
+class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
+class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
+class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
+class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
+class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
+class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
+class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
+class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
+class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
+class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
+class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
+class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
+class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
+class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
+class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
+class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
+class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
+class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
+class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
+class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
+class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
+class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
+class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
+class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
+class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
+class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
+class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
+class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
+class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
+class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
+class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
+class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
+class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
+class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
+class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
+class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
+class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
+class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
+class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
+class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
+class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
+class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
+class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
+class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
+class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
+class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
+class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
+class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
+class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
+class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
+class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
+class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
+class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
+class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
+class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
+class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
+class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
+class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
+class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
+class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
+class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
+class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
+class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
+class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
+class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
+class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
+class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
+class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
+class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
+class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
+class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
+class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
+class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
+class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
+class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
+class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
+class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
+class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
+class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
+class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
+class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
+class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
+
+class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
+  let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
+}
+
+class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
+class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
+class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
+class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
+class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
+class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
+class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
+class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
+class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
+class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
+class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
+class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
+class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
+class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
+class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
+class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
+class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
+class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
+class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
+class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
+class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
+class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
+class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
+class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
+class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
+class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
+class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
+class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
+class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
+class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
+class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
+class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
+class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
+class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
+class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
+class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
+class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
+class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
+
+class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
+  let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
+class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
+class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
+class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
+class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
+class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
+class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
+class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
+class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
+class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
+class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
+class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
+
+class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { 0b00011110000000, opc{5-4} };
+  let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
+class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
+class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
+class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
+class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
+class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
+class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
+class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
+class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
+class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
+class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
+class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
+class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
+class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
+class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
+class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
+class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
+class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
+class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
+class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
+class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
+class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
+class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
+class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
+class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
+
+class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
+class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
+class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
+class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
+class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
+class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
+class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
+class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
+class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
+class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
+class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
+class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
+class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
+
+class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
+class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
+class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
+
+class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
+class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
+class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
+class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
+class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<4> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{9-6};
+  let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<11> src3;
+  bits<4> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{10-7};
+  let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
+class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
+class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
+class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
+class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
+class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
+class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
+class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
+class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
+class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
+
+class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
+class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
+class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
+class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
+class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
+class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
+class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
+class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
+class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
+class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<4> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{9-6};
+  let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
+class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
+class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
+class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<11> src3;
+  bits<4> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{10-7};
+  let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
+class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
+class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
+class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
+
+// TODO: Change script to generate dst, src1, src2 instead of
+// dst, dst2, src1.
+class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
+class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
+class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
+class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
+class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
+class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
+class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
+class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
+class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
+class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
+class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
+class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
+class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
+
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+  let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
+class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
+class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
+
+class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+  let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
+class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
+class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
+class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
+class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<9> src3;
+  bits<3> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{8-6};
+  let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
+class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
+class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
+class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
+class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
+class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
+class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
+class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
+class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
+class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<3> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{9-7};
+  let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
+class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
+class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
+class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
+class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
+class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
+class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
+class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
+class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
+class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<9> src3;
+  bits<3> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{8-6};
+  let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
+class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
+class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
+class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<3> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{9-7};
+  let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
+class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
+class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
+class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
+
+class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<1> src2;
+
+  let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
+class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
+class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
+class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
+class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
+class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
+class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
+
+class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<1> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
+class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
+class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<1> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
+}
+
+class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
+class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<1> src3;
+  bits<5> src4;
+
+  let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
+class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
+class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
+class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
+class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
+class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
+class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
+class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
+class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
+class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<1> src3;
+  bits<3> src4;
+
+  let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
+class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
+class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
+class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
+
+
+class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<1> src3;
+
+  let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
+  let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
+}
+
+class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
+class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
+class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
+class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
+class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
+class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
+
+class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<2> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
+  let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
+}
+
+class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
+class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
+
+class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b00011001111, src3{4-0} };
+  let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
+}
+
+class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
+class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
+
+
+class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
+  let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
+class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
+
+class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
+  let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
+}
+
+class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
+class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
+
+class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
+  let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
+class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
+class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
+class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
+class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
+class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
+class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
+class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
+class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
+class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
+class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
+class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
+class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
+class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
+class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
+
+class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
+  let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
+}
+
+class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
+class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
+class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
+class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
+class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
+class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
+class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
+class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
+
+class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<2> src1;
+  bits<2> src2;
+
+  let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
+  let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
+}
+
+class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
+class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
+class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
+class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
+class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
+
+class V6_pred_not_enc : OpcodeHexagon {
+  bits<2> dst;
+  bits<2> src1;
+
+  let Inst{31-16} = { 0b0001111000000011 };
+  let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
+}
+
+class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<2> src1;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
+  let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
+class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
+
+class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{15-5}, src1{4-0} };
+  let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
+}
+
+class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
+class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
+class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
+
+
+class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
+  let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
+}
+
+class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
+class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
+
+class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<6> src2;
+
+  let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
+  let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
+class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
+class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
+class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
+class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
+class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
+
+class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { opc{14-4}, src1{4-0} };
+  let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
+class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
+class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
+class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
+class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
+class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
+class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
+class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
+
+class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
+
+  let Inst{31-16} = { opc{24-10}, 0 };
+  let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
+}
+
+class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
+class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
+class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
+class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
+
+class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
+  let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
+}
+
+class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
+class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
+
+class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
+  bits<5> src1;
+
+  let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
+  let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
+}
+
+class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
+class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
+
+class A5_ACS_enc : OpcodeHexagon {
+  bits<5> dst1;
+  bits<2> dst2;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b11101010101, src1{4-0} };
+  let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
+
+class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<2> src3;
+
+  let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
+  let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
+}
+
+class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
+class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
+class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
+
+class V6_vhistq_enc : OpcodeHexagon {
+  bits<2> src1;
+
+  let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
+  let Inst{13-0} = { 0b10000010000000 };
+}
+
+// TODO: Change script to generate dst1 instead of dst.
+class A6_vminub_RdP_enc : OpcodeHexagon {
+  bits<5> dst1;
+  bits<2> dst2;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b11101010111, src2{4-0} };
+  let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 0000000..3c5ec17
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,448 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//                         Hexagon Instruction Flags +
+//
+//                    *** Must match HexagonBaseInfo.h ***
+//===----------------------------------------------------------------------===//
+
+class IType<bits<5> t> {
+  bits<5> Value = t;
+}
+def TypePSEUDO : IType<0>;
+def TypeALU32  : IType<1>;
+def TypeCR     : IType<2>;
+def TypeJR     : IType<3>;
+def TypeJ      : IType<4>;
+def TypeLD     : IType<5>;
+def TypeST     : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE  : IType<8>;
+def TypeENDLOOP: IType<31>;
+
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<6> value> {
+  bits<6> Value = value;
+}
+
+def HasAnySubT    : SubTarget<0x3f>;  // 111111
+def HasV5SubT     : SubTarget<0x3e>;  // 111110
+def HasV55SubT    : SubTarget<0x3c>;  // 111100
+def HasV60SubT    : SubTarget<0x38>;  // 111000
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<3> value> {
+  bits<3> Value = value;
+}
+
+def NoAddrMode     : AddrModeType<0>;  // No addressing mode
+def Absolute       : AddrModeType<1>;  // Absolute addressing mode
+def AbsoluteSet    : AddrModeType<2>;  // Absolute set addressing mode
+def BaseImmOffset  : AddrModeType<3>;  // Indirect with offset
+def BaseLongOffset : AddrModeType<4>;  // Indirect with long offset
+def BaseRegOffset  : AddrModeType<5>;  // Indirect with register offset
+def PostInc        : AddrModeType<6>;  // Post increment addressing mode
+
+class MemAccessSize<bits<4> value> {
+  bits<4> Value = value;
+}
+
+def NoMemAccess      : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess       : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess   : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess       : MemAccessSize<3>;// Word access instruction (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+def Vector64Access   : MemAccessSize<7>;// Vector access instruction (memv)
+def Vector128Access  : MemAccessSize<8>;// Vector access instruction (memv)
+
+
+//===----------------------------------------------------------------------===//
+//                         Instruction Class Declaration +
+//===----------------------------------------------------------------------===//
+
+class OpcodeHexagon {
+  field bits<32> Inst = ?; // Default to an invalid insn.
+  bits<4> IClass = 0; // ICLASS
+
+  let Inst{31-28} = IClass;
+
+  bits<1> zero = 0;
+}
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr, InstrItinClass itin, IType type>
+  : Instruction {
+  let Namespace = "Hexagon";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString = asmstr;
+  let Pattern = pattern;
+  let Constraints = cstr;
+  let Itinerary = itin;
+  let Size = 4;
+
+  // SoftFail is a field the disassembler can use to provide a way for
+  // instructions to not match without killing the whole decode process. It is
+  // mainly used for ARM, but Tablegen expects this field to exist or it fails
+  // to build the decode table.
+  field bits<32> SoftFail = 0;
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+  // Instruction type according to the ISA.
+  IType Type = type;
+  let TSFlags{4-0} = Type.Value;
+
+  // Solo instructions, i.e., those that cannot be in a packet with others.
+  bits<1> isSolo = 0;
+  let TSFlags{5} = isSolo;
+  // Packed only with A or X-type instructions.
+  bits<1> isSoloAX = 0;
+  let TSFlags{6} = isSoloAX;
+  // Only A-type instruction in first slot or nothing.
+  bits<1> isSoloAin1 = 0;
+  let TSFlags{7} = isSoloAin1;
+
+  // Predicated instructions.
+  bits<1> isPredicated = 0;
+  let TSFlags{8} = isPredicated;
+  bits<1> isPredicatedFalse = 0;
+  let TSFlags{9} = isPredicatedFalse;
+  bits<1> isPredicatedNew = 0;
+  let TSFlags{10} = isPredicatedNew;
+  bits<1> isPredicateLate = 0;
+  let TSFlags{11} = isPredicateLate; // Late predicate producer insn.
+
+  // New-value insn helper fields.
+  bits<1> isNewValue = 0;
+  let TSFlags{12} = isNewValue; // New-value consumer insn.
+  bits<1> hasNewValue = 0;
+  let TSFlags{13} = hasNewValue; // New-value producer insn.
+  bits<3> opNewValue = 0;
+  let TSFlags{16-14} = opNewValue; // New-value produced operand.
+  bits<1> isNVStorable = 0;
+  let TSFlags{17} = isNVStorable; // Store that can become new-value store.
+  bits<1> isNVStore = 0;
+  let TSFlags{18} = isNVStore; // New-value store insn.
+  bits<1> isCVLoadable = 0;
+  let TSFlags{19} = isCVLoadable; // Load that can become cur-value load.
+  bits<1> isCVLoad = 0;
+  let TSFlags{20} = isCVLoad; // Cur-value load insn.
+
+  // Immediate extender helper fields.
+  bits<1> isExtendable = 0;
+  let TSFlags{21} = isExtendable; // Insn may be extended.
+  bits<1> isExtended = 0;
+  let TSFlags{22} = isExtended; // Insn must be extended.
+  bits<3> opExtendable = 0;
+  let TSFlags{25-23} = opExtendable; // Which operand may be extended.
+  bits<1> isExtentSigned = 0;
+  let TSFlags{26} = isExtentSigned; // Signed or unsigned range.
+  bits<5> opExtentBits = 0;
+  let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending.
+  bits<2> opExtentAlign = 0;
+  let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
+
+  // If an instruction is valid on a subtarget, set the corresponding
+  // bit from validSubTargets.
+  // By default, instruction is valid on all subtargets.
+  SubTarget validSubTargets = HasAnySubT;
+  let TSFlags{39-34} = validSubTargets.Value;
+
+  // Addressing mode for load/store instructions.
+  AddrModeType addrMode = NoAddrMode;
+  let TSFlags{42-40} = addrMode.Value;
+
+  // Memory access size for mem access instructions (load/store)
+  MemAccessSize accessSize = NoMemAccess;
+  let TSFlags{46-43} = accessSize.Value;
+
+  bits<1> isTaken = 0;
+  let TSFlags {47} = isTaken; // Branch prediction.
+
+  bits<1> isFP = 0;
+  let TSFlags {48} = isFP; // Floating-point.
+
+  bits<1> hasNewValue2 = 0;
+  let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+  bits<3> opNewValue2 = 0;
+  let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+
+  bits<1> isAccumulator = 0;
+  let TSFlags{54} = isAccumulator;
+
+  // Fields used for relation models.
+  bit isNonTemporal = 0;
+  string isNT = ""; // set to "true" for non-temporal vector stores.
+  string BaseOpcode = "";
+  string CextOpcode = "";
+  string PredSense = "";
+  string PNewValue = "";
+  string NValueST  = "";    // Set to "true" for new-value stores.
+  string InputType = "";    // Input is "imm" or "reg" type.
+  string isFloat = "false"; // Set to "true" for the floating-point load/store.
+  string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
+
+  let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+                                    "");
+  let PNewValue = !if(isPredicatedNew, "new", "");
+  let NValueST = !if(isNVStore, "true", "false");
+  let isNT = !if(isNonTemporal, "true", "false");
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+}
+
+//===----------------------------------------------------------------------===//
+//                         Instruction Classes Definitions +
+//===----------------------------------------------------------------------===//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+let mayLoad = 1 in
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                  string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+
+let mayLoad = 1 in
+class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+
+// Same as ST0Inst but doesn't derive from OpcodeHexagon.
+let mayStore = 1 in
+class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+  : STInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "",  InstrItinClass itin = ST_tc_3stall_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+    OpcodeHexagon;
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+     OpcodeHexagon;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+  : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+    OpcodeHexagon;
+
+// Same as above but doesn't derive from OpcodeHexagon
+class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = M_tc_2_SLOT23>
+    : MInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+    OpcodeHexagon;
+
+class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23>
+  : SInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+    OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+    OpcodeHexagon;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr="">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+    OpcodeHexagon;
+
+//===----------------------------------------------------------------------===//
+//                         Instruction Classes Definitions -
+//===----------------------------------------------------------------------===//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+   : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+   : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 0000000..2d1dea5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,155 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+//                         Hexagon Instruction Flags
+//
+//                        *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP    : IType<9>;
+def TypeNV       : IType<10>;
+def TypeDUPLEX   : IType<11>;
+def TypeCOMPOUND : IType<12>;
+def TypePREFIX   : IType<30>;
+
+//                      Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+  field bits<32> Inst = ?; // Default to an invalid insn.
+  bits<4> IClass = 0; // ICLASS
+  bits<13> ISubHi = 0; // Low sub-insn
+  bits<13> ISubLo = 0; // High sub-insn
+
+  let Inst{31-29} = IClass{3-1};
+  let Inst{13}    = IClass{0};
+  let Inst{15-14} = 0;
+  let Inst{28-16} = ISubHi;
+  let Inst{12-0}  = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+                 string cstr = "">
+  : Instruction, OpcodeDuplex {
+  let Namespace = "Hexagon";
+  IType Type = TypeDUPLEX;  // uses slot 0,1
+  let isCodeGenOnly = 1;
+  let hasSideEffects = 0;
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins);
+  let IClass = iClass;
+  let Constraints = cstr;
+  let Itinerary = DUPLEX;
+  let Size = 4;
+
+  // SoftFail is a field the disassembler can use to provide a way for
+  // instructions to not match without killing the whole decode process. It is
+  // mainly used for ARM, but Tablegen expects this field to exist or it fails
+  // to build the decode table.
+  field bits<32> SoftFail = 0;
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+  let TSFlags{4-0} = Type.Value;
+
+  // Predicated instructions.
+  bits<1> isPredicated = 0;
+  let TSFlags{6} = isPredicated;
+  bits<1> isPredicatedFalse = 0;
+  let TSFlags{7} = isPredicatedFalse;
+  bits<1> isPredicatedNew = 0;
+  let TSFlags{8} = isPredicatedNew;
+
+  // New-value insn helper fields.
+  bits<1> isNewValue = 0;
+  let TSFlags{9} = isNewValue; // New-value consumer insn.
+  bits<1> hasNewValue = 0;
+  let TSFlags{10} = hasNewValue; // New-value producer insn.
+  bits<3> opNewValue = 0;
+  let TSFlags{13-11} = opNewValue; // New-value produced operand.
+  bits<1> isNVStorable = 0;
+  let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+  bits<1> isNVStore = 0;
+  let TSFlags{15} = isNVStore; // New-value store insn.
+
+  // Immediate extender helper fields.
+  bits<1> isExtendable = 0;
+  let TSFlags{16} = isExtendable; // Insn may be extended.
+  bits<1> isExtended = 0;
+  let TSFlags{17} = isExtended; // Insn must be extended.
+  bits<3> opExtendable = 0;
+  let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+  bits<1> isExtentSigned = 0;
+  let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+  bits<5> opExtentBits = 0;
+  let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+  bits<2> opExtentAlign = 0;
+  let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
+
+//----------------------------------------------------------------------------//
+//                         Instruction Classes Definitions
+//----------------------------------------------------------------------------//
+
+//
+// NV type instructions.
+//
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+  : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+  : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// Post increment ST Instruction.
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+  : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+    OpcodeHexagon;
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+  : MEMInst<outs, ins, asmstr, pattern, cstr, itin>;
+
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+  : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
+                TypePREFIX>, OpcodeHexagon;
+
+class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>,
+    OpcodeHexagon;
+
+class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+    OpcodeHexagon;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
new file mode 100644
index 0000000..f3d43de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -0,0 +1,238 @@
+//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+//                         Hexagon Intruction Flags +
+//
+//                        *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeCVI_VA         : IType<13>;
+def TypeCVI_VA_DV      : IType<14>;
+def TypeCVI_VX         : IType<15>;
+def TypeCVI_VX_DV      : IType<16>;
+def TypeCVI_VP         : IType<17>;
+def TypeCVI_VP_VS      : IType<18>;
+def TypeCVI_VS         : IType<19>;
+def TypeCVI_VINLANESAT : IType<20>;
+def TypeCVI_VM_LD      : IType<21>;
+def TypeCVI_VM_TMP_LD  : IType<22>;
+def TypeCVI_VM_CUR_LD  : IType<23>;
+def TypeCVI_VM_VP_LDU  : IType<24>;
+def TypeCVI_VM_ST      : IType<25>;
+def TypeCVI_VM_NEW_ST  : IType<26>;
+def TypeCVI_VM_STU     : IType<27>;
+def TypeCVI_HIST       : IType<28>;
+//----------------------------------------------------------------------------//
+//                         Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VA>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VA_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_long<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_late<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_LATE>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_DV_SLOT2>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VX_DV_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_EARLY>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_LONG_EARLY>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VS>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VINLANESAT>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource_long<dag outs, dag ins, string asmstr,
+                           list<dag> pattern = [], string cstr = "",
+                           InstrItinClass itin = CVI_VS>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr,
+                              list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_TMP_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_TMP_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_CUR_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_VP_LDU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_VP_LDU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr,
+                              list<dag> pattern = [], string cstr = "",
+                              InstrItinClass itin = CVI_VM_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_NEW_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_NEW_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr,
+                          list<dag> pattern = [], string cstr = "",
+                          InstrItinClass itin = CVI_VM_STU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr,
+                               list<dag> pattern = [], string cstr = "",
+                               InstrItinClass itin = CVI_VM_STU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
+                        list<dag> pattern = [], string cstr = "",
+                        InstrItinClass itin = CVI_HIST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+}
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VA>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VX_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
+                        list<dag> pattern = [], string cstr = "",
+                        InstrItinClass itin = CVI_HIST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+     Requires<[HasV60T, UseHVX]>;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 0000000..eb3590c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,3828 @@
+//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-instrinfo"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRMAP_INFO
+#include "HexagonGenInstrInfo.inc"
+#include "HexagonGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
+  cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
+                            "packetization boundary."));
+
+static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
+  cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
+
+static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable schedule adjustment for new value stores."));
+
+static cl::opt<bool> EnableTimingClassLatency(
+  "enable-timing-class-latency", cl::Hidden, cl::init(false),
+  cl::desc("Enable timing class latency"));
+
+static cl::opt<bool> EnableALUForwarding(
+  "enable-alu-forwarding", cl::Hidden, cl::init(true),
+  cl::desc("Enable vec alu forwarding"));
+
+static cl::opt<bool> EnableACCForwarding(
+  "enable-acc-forwarding", cl::Hidden, cl::init(true),
+  cl::desc("Enable vec acc forwarding"));
+
+static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMV_OFFSET_MAX_128B = 2047;  // #s7
+const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7
+const int Hexagon_MEMV_OFFSET_MAX = 1023;  // #s6
+const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = -4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = -8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = -2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = -1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = -32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = -64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = -32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = -16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = -8;
+const int Hexagon_MEMV_AUTOINC_MAX = 192;
+const int Hexagon_MEMV_AUTOINC_MIN = -256;
+const int Hexagon_MEMV_AUTOINC_MAX_128B = 384;
+const int Hexagon_MEMV_AUTOINC_MIN_128B = -512;
+
+// Pin the vtable to this file.
+void HexagonInstrInfo::anchor() {}
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+    : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+      RI() {}
+
+
+static bool isIntRegForSubInst(unsigned Reg) {
+  return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+         (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
+}
+
+
+static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
+  return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
+         isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
+}
+
+
+/// Calculate number of instructions excluding the debug instructions.
+static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
+                              MachineBasicBlock::const_instr_iterator MIE) {
+  unsigned Count = 0;
+  for (; MIB != MIE; ++MIB) {
+    if (!MIB->isDebugValue())
+      ++Count;
+  }
+  return Count;
+}
+
+
+/// Find the hardware loop instruction used to set-up the specified loop.
+/// On Hexagon, we have two instructions used to set-up the hardware loop
+/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+/// to indicate the end of a loop.
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+      SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+  int LOOPi;
+  int LOOPr;
+  if (EndLoopOp == Hexagon::ENDLOOP0) {
+    LOOPi = Hexagon::J2_loop0i;
+    LOOPr = Hexagon::J2_loop0r;
+  } else { // EndLoopOp == Hexagon::EndLOOP1
+    LOOPi = Hexagon::J2_loop1i;
+    LOOPr = Hexagon::J2_loop1r;
+  }
+
+  // The loop set-up instruction will be in a predecessor block
+  for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
+         PE = BB->pred_end(); PB != PE; ++PB) {
+    // If this has been visited, already skip it.
+    if (!Visited.insert(*PB).second)
+      continue;
+    if (*PB == BB)
+      continue;
+    for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
+           E = (*PB)->instr_rend(); I != E; ++I) {
+      int Opc = I->getOpcode();
+      if (Opc == LOOPi || Opc == LOOPr)
+        return &*I;
+      // We've reached a different loop, which means the loop0 has been removed.
+      if (Opc == EndLoopOp)
+        return 0;
+    }
+    // Check the predecessors for the LOOP instruction.
+    MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
+    if (loop)
+      return loop;
+  }
+  return 0;
+}
+
+
+/// Gather register def/uses from MI.
+/// This treats possible (predicated) defs as actually happening ones
+/// (conservatively).
+static inline void parseOperands(const MachineInstr *MI,
+      SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
+  Defs.clear();
+  Uses.clear();
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (MO.isUse())
+      Uses.push_back(MO.getReg());
+
+    if (MO.isDef())
+      Defs.push_back(MO.getReg());
+  }
+}
+
+
+// Position dependent, so check twice for swap.
+static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+  switch (Ga) {
+  case HexagonII::HSIG_None:
+  default:
+    return false;
+  case HexagonII::HSIG_L1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_L2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_A:
+    return (Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_Compound:
+    return (Gb == HexagonII::HSIG_Compound);
+  }
+  return false;
+}
+
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case Hexagon::L2_loadri_io:
+  case Hexagon::L2_loadrd_io:
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case Hexagon::S2_storeri_io:
+  case Hexagon::S2_storerd_io:
+  case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerb_io:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+
+/// This function can analyze one/two way branching only and should (mostly) be
+/// called by target independent side.
+/// First entry is always the opcode of the branching instruction, except when
+/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
+/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
+/// e.g. Jump_c p will have
+/// Cond[0] = Jump_c
+/// Cond[1] = p
+/// HW-loop ENDLOOP:
+/// Cond[0] = ENDLOOP
+/// Cond[1] = MBB
+/// New value jump:
+/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
+/// Cond[1] = R
+/// Cond[2] = Imm
+///
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  TBB = nullptr;
+  FBB = nullptr;
+  Cond.clear();
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  if (I == MBB.instr_begin())
+    return false;
+
+  // A basic block may looks like this:
+  //
+  //  [   insn
+  //     EH_LABEL
+  //      insn
+  //      insn
+  //      insn
+  //     EH_LABEL
+  //      insn     ]
+  //
+  // It has two succs but does not have a terminator
+  // Don't know how to handle it.
+  do {
+    --I;
+    if (I->isEHLabel())
+      // Don't analyze EH branches.
+      return true;
+  } while (I != MBB.instr_begin());
+
+  I = MBB.instr_end();
+  --I;
+
+  while (I->isDebugValue()) {
+    if (I == MBB.instr_begin())
+      return false;
+    --I;
+  }
+
+  bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+                     I->getOperand(0).isMBB();
+  // Delete the J2_jump if it's equivalent to a fall-through.
+  if (AllowModify && JumpToBlock &&
+      MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+    DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+    I->eraseFromParent();
+    I = MBB.instr_end();
+    if (I == MBB.instr_begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(&*I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  MachineInstr *SecondLastInst = nullptr;
+  // Find one more terminator if present.
+  for (;;) {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+      if (!SecondLastInst)
+        SecondLastInst = &*I;
+      else
+        // This is a third branch.
+        return true;
+    }
+    if (I == MBB.instr_begin())
+      break;
+    --I;
+  }
+
+  int LastOpcode = LastInst->getOpcode();
+  int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+  // If the branch target is not a basic block, it could be a tail call.
+  // (It is, if the target is a function.)
+  if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+    return true;
+  if (SecLastOpcode == Hexagon::J2_jump &&
+      !SecondLastInst->getOperand(0).isMBB())
+    return true;
+
+  bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
+  bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
+
+  // If there is only one terminator instruction, process it.
+  if (LastInst && !SecondLastInst) {
+    if (LastOpcode == Hexagon::J2_jump) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isEndLoopN(LastOpcode)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    if (LastOpcodeHasJMP_c) {
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Only supporting rr/ri versions of new-value jumps.
+    if (LastOpcodeHasNVJump && (LastInst->getNumExplicitOperands() == 3)) {
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+                 << " with one jump\n";);
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
+  bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
+  if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // Only supporting rr/ri versions of new-value jumps.
+  if (SecLastOpcodeHasNVJump &&
+      (SecondLastInst->getNumExplicitOperands() == 3) &&
+      (LastOpcode == Hexagon::J2_jump)) {
+    TBB = SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two Hexagon:JMPs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst->getIterator();
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // If the block ends with an ENDLOOP, and J2_jump, handle it.
+  if (isEndLoopN(SecLastOpcode) && LastOpcode == Hexagon::J2_jump) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber()
+               << " with two jumps";);
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    // Only removing branches from end of MBB.
+    if (!I->isBranch())
+      return Count;
+    if (Count && (I->getOpcode() == Hexagon::J2_jump))
+      llvm_unreachable("Malformed basic block: unconditional branch not last");
+    MBB.erase(&MBB.back());
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
+}
+
+
+unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+      MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+      ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+  unsigned BOpc   = Hexagon::J2_jump;
+  unsigned BccOpc = Hexagon::J2_jumpt;
+  assert(validateBranchCond(Cond) && "Invalid branching condition");
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  // Check if ReverseBranchCondition has asked to reverse this branch
+  // If we want to reverse the branch an odd number of times, we want
+  // J2_jumpf.
+  if (!Cond.empty() && Cond[0].isImm())
+    BccOpc = Cond[0].getImm();
+
+  if (!FBB) {
+    if (Cond.empty()) {
+      // Due to a bug in TailMerging/CFG Optimization, we need to add a
+      // special case handling of a predicated jump followed by an
+      // unconditional jump. If not, Tail Merging and CFG Optimization go
+      // into an infinite loop.
+      MachineBasicBlock *NewTBB, *NewFBB;
+      SmallVector<MachineOperand, 4> Cond;
+      MachineInstr *Term = MBB.getFirstTerminator();
+      if (Term != MBB.end() && isPredicated(Term) &&
+          !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+        MachineBasicBlock *NextBB = &*++MBB.getIterator();
+        if (NewTBB == NextBB) {
+          ReverseBranchCondition(Cond);
+          RemoveBranch(MBB);
+          return InsertBranch(MBB, TBB, nullptr, Cond, DL);
+        }
+      }
+      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+    } else if (isEndLoopN(Cond[0].getImm())) {
+      int EndLoopOp = Cond[0].getImm();
+      assert(Cond[1].isMBB());
+      // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+      // Check for it, and change the BB target if needed.
+      SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+      MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+      assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+      Loop->getOperand(0).setMBB(TBB);
+      // Add the ENDLOOP after the finding the LOOP0.
+      BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+    } else if (isNewValueJump(Cond[0].getImm())) {
+      assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+      // New value jump
+      // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+      // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+      unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+      DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+      if (Cond[2].isReg()) {
+        unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+      } else if(Cond[2].isImm()) {
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addImm(Cond[2].getImm()).addMBB(TBB);
+      } else
+        llvm_unreachable("Invalid condition for branching");
+    } else {
+      assert((Cond.size() == 2) && "Malformed cond vector");
+      const MachineOperand &RO = Cond[1];
+      unsigned Flags = getUndefRegState(RO.isUndef());
+      BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+    }
+    return 1;
+  }
+  assert((!Cond.empty()) &&
+         "Cond. cannot be empty when multiple branchings are required");
+  assert((!isNewValueJump(Cond[0].getImm())) &&
+         "NV-jump cannot be inserted with another branch");
+  // Special case for hardware loops.  The condition is a basic block.
+  if (isEndLoopN(Cond[0].getImm())) {
+    int EndLoopOp = Cond[0].getImm();
+    assert(Cond[1].isMBB());
+    // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+    // Check for it, and change the BB target if needed.
+    SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+    MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+    assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+    Loop->getOperand(0).setMBB(TBB);
+    // Add the ENDLOOP after the finding the LOOP0.
+    BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+  } else {
+    const MachineOperand &RO = Cond[1];
+    unsigned Flags = getUndefRegState(RO.isUndef());
+    BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+  }
+  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+  return 2;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+      unsigned NumCycles, unsigned ExtraPredCycles,
+      BranchProbability Probability) const {
+  return nonDbgBBSize(&MBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+      unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
+      unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
+      const {
+  return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+      unsigned NumInstrs, BranchProbability Probability) const {
+  return NumInstrs <= 4;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg,
+      unsigned SrcReg, bool KillSrc) const {
+  auto &HRI = getRegisterInfo();
+  if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+    // Map Pd = Ps to Pd = or(Ps, Ps).
+    BuildMI(MBB, I, DL, get(Hexagon::C2_or),
+            DestReg).addReg(SrcReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
+      Hexagon::IntRegsRegClass.contains(SrcReg)) {
+    // We can have an overlap between single and double reg: r1:0 = r0.
+    if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+        // r1:0 = r0
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    } else {
+        // r1:0 = r1 or no overlap.
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg,
+                Hexagon::subreg_loreg))).addReg(SrcReg);
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    }
+    return;
+  }
+  if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
+      Hexagon::IntRegsRegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+      Hexagon::IntRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+      Hexagon::PredRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+      Hexagon::IntRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+             getKillRegState(KillSrc)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+             getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg).
+      addReg(SrcReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(SrcReg) &&
+    Hexagon::VectorRegsRegClass.contains(DestReg)) {
+    llvm_unreachable("Unimplemented pred to vec");
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(DestReg) &&
+      Hexagon::VectorRegsRegClass.contains(SrcReg)) {
+    llvm_unreachable("Unimplemented vec to pred");
+    return;
+  }
+  if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+      HRI.getSubReg(DestReg, Hexagon::subreg_hireg)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+             getKillRegState(KillSrc));
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+      HRI.getSubReg(DestReg, Hexagon::subreg_loreg)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+             getKillRegState(KillSrc));
+    return;
+  }
+
+#ifndef NDEBUG
+  // Show the invalid registers to ease debugging.
+  dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber()
+         << ": " << PrintReg(DestReg, &HRI)
+         << " = " << PrintReg(SrcReg, &HRI) << '\n';
+#endif
+  llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+      const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), Align);
+
+  if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, unsigned DestReg, int FI,
+      const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), Align);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == &Hexagon::DoubleRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == &Hexagon::PredRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Can't store this register to stack slot");
+  }
+}
+
+
+/// expandPostRAPseudo - This function is called for all pseudo instructions
+/// that remain after register allocation. Many pseudo instructions are
+/// created to help register allocation. This is the place to convert them
+/// into real instructions. The target can edit MI in place, or it can insert
+/// new instructions and erase MI. The function should return true if
+/// anything was changed.
+bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI)
+      const {
+  const HexagonRegisterInfo &HRI = getRegisterInfo();
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Opc = MI->getOpcode();
+  const unsigned VecOffset = 1;
+  bool Is128B = false;
+
+  switch (Opc) {
+    case Hexagon::ALIGNA:
+      BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
+          .addReg(HRI.getFrameRegister())
+          .addImm(-MI->getOperand(1).getImm());
+      MBB.erase(MI);
+      return true;
+    case Hexagon::HEXAGON_V6_vassignp_128B:
+    case Hexagon::HEXAGON_V6_vassignp: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (SrcReg != DstReg)
+        copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::HEXAGON_V6_lo_128B:
+    case Hexagon::HEXAGON_V6_lo: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+      copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      MRI.clearKillFlags(SrcSubLo);
+      return true;
+    }
+    case Hexagon::HEXAGON_V6_hi_128B:
+    case Hexagon::HEXAGON_V6_hi: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+      copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      MRI.clearKillFlags(SrcSubHi);
+      return true;
+    }
+    case Hexagon::STrivv_indexed_128B:
+      Is128B = true;
+    case Hexagon::STrivv_indexed: {
+      unsigned SrcReg = MI->getOperand(2).getReg();
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+      unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
+                                : Hexagon::V6_vS32b_ai;
+      unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+      MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd))
+          .addOperand(MI->getOperand(0))
+          .addImm(MI->getOperand(1).getImm())
+          .addReg(SrcSubLo)
+          .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MI1New->getOperand(0).setIsKill(false);
+      BuildMI(MBB, MI, DL, get(NewOpcd))
+        .addOperand(MI->getOperand(0))
+        // The Vectors are indexed in multiples of vector size.
+        .addImm(MI->getOperand(1).getImm()+Offset)
+        .addReg(SrcSubHi)
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::LDrivv_pseudo_V6_128B:
+    case Hexagon::LDrivv_indexed_128B:
+      Is128B = true;
+    case Hexagon::LDrivv_pseudo_V6:
+    case Hexagon::LDrivv_indexed: {
+      unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
+                                : Hexagon::V6_vL32b_ai;
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+      MachineInstr *MI1New =
+          BuildMI(MBB, MI, DL, get(NewOpcd),
+                  HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+              .addOperand(MI->getOperand(1))
+              .addImm(MI->getOperand(2).getImm());
+      MI1New->getOperand(1).setIsKill(false);
+      BuildMI(MBB, MI, DL, get(NewOpcd),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+          .addOperand(MI->getOperand(1))
+          // The Vectors are indexed in multiples of vector size.
+          .addImm(MI->getOperand(2).getImm() + Offset)
+          .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::LDriv_pseudo_V6_128B:
+      Is128B = true;
+    case Hexagon::LDriv_pseudo_V6: {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+                               : Hexagon::V6_vL32b_ai;
+      int32_t Off = MI->getOperand(2).getImm();
+      int32_t Idx = Off;
+      BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+        .addOperand(MI->getOperand(1))
+        .addImm(Idx)
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::STriv_pseudo_V6_128B:
+      Is128B = true;
+    case Hexagon::STriv_pseudo_V6: {
+      unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+                               : Hexagon::V6_vS32b_ai;
+      int32_t Off = MI->getOperand(1).getImm();
+      int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6);
+      BuildMI(MBB, MI, DL, get(NewOpc))
+        .addOperand(MI->getOperand(0))
+        .addImm(Idx)
+        .addOperand(MI->getOperand(2))
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TFR_PdTrue: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TFR_PdFalse: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::VMULW: {
+      // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      return true;
+    }
+    case Hexagon::VMULW_ACC: {
+      // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src3Reg = MI->getOperand(3).getReg();
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+      unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi).addReg(Src3SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo).addReg(Src3SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      MRI.clearKillFlags(Src3SubHi);
+      MRI.clearKillFlags(Src3SubLo);
+      return true;
+    }
+    case Hexagon::MUX64_rr: {
+      const MachineOperand &Op0 = MI->getOperand(0);
+      const MachineOperand &Op1 = MI->getOperand(1);
+      const MachineOperand &Op2 = MI->getOperand(2);
+      const MachineOperand &Op3 = MI->getOperand(3);
+      unsigned Rd = Op0.getReg();
+      unsigned Pu = Op1.getReg();
+      unsigned Rs = Op2.getReg();
+      unsigned Rt = Op3.getReg();
+      DebugLoc DL = MI->getDebugLoc();
+      unsigned K1 = getKillRegState(Op1.isKill());
+      unsigned K2 = getKillRegState(Op2.isKill());
+      unsigned K3 = getKillRegState(Op3.isKill());
+      if (Rd != Rs)
+        BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
+          .addReg(Pu, (Rd == Rt) ? K1 : 0)
+          .addReg(Rs, K2);
+      if (Rd != Rt)
+        BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
+          .addReg(Pu, K1)
+          .addReg(Rt, K3);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TCRETURNi:
+      MI->setDesc(get(Hexagon::J2_jump));
+      return true;
+    case Hexagon::TCRETURNr:
+      MI->setDesc(get(Hexagon::J2_jumpr));
+      return true;
+    case Hexagon::TFRI_f:
+    case Hexagon::TFRI_cPt_f:
+    case Hexagon::TFRI_cNotPt_f: {
+      unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
+      APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF();
+      APInt IVal = FVal.bitcastToAPInt();
+      MI->RemoveOperand(Opx);
+      unsigned NewOpc = (Opc == Hexagon::TFRI_f)     ? Hexagon::A2_tfrsi   :
+                        (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
+                                                       Hexagon::C2_cmoveif;
+      MI->setDesc(get(NewOpc));
+      MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+// We indicate that we want to reverse the branch by
+// inserting the reversed branching opcode.
+bool HexagonInstrInfo::ReverseBranchCondition(
+      SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty())
+    return true;
+  assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+  unsigned opcode = Cond[0].getImm();
+  //unsigned temp;
+  assert(get(opcode).isBranch() && "Should be a branching condition.");
+  if (isEndLoopN(opcode))
+    return true;
+  unsigned NewOpcode = getInvertedPredicatedOpcode(opcode);
+  Cond[0].setImm(NewOpcode);
+  return false;
+}
+
+
+void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
+}
+
+
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
+// Note: New-value stores are not included here as in the current
+// implementation, we don't need to check their predicate sense.
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI,
+      ArrayRef<MachineOperand> Cond) const {
+  if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
+      isEndLoopN(Cond[0].getImm())) {
+    DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+    return false;
+  }
+  int Opc = MI->getOpcode();
+  assert (isPredicable(MI) && "Expected predicable instruction");
+  bool invertJump = predOpcodeHasNot(Cond);
+
+  // We have to predicate MI "in place", i.e. after this function returns,
+  // MI will need to be transformed into a predicated form. To avoid com-
+  // plicated manipulations with the operands (handling tied operands,
+  // etc.), build a new temporary instruction, then overwrite MI with it.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned PredOpc = getCondOpcode(Opc, invertJump);
+  MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+  unsigned NOp = 0, NumOps = MI->getNumOperands();
+  while (NOp < NumOps) {
+    MachineOperand &Op = MI->getOperand(NOp);
+    if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+      break;
+    T.addOperand(Op);
+    NOp++;
+  }
+
+  unsigned PredReg, PredRegPos, PredRegFlags;
+  bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+  (void)GotPredReg;
+  assert(GotPredReg);
+  T.addReg(PredReg, PredRegFlags);
+  while (NOp < NumOps)
+    T.addOperand(MI->getOperand(NOp++));
+
+  MI->setDesc(get(PredOpc));
+  while (unsigned n = MI->getNumOperands())
+    MI->RemoveOperand(n-1);
+  for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+    MI->addOperand(T->getOperand(i));
+
+  MachineBasicBlock::instr_iterator TI = T->getIterator();
+  B.erase(TI);
+
+  MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+  MRI.clearKillFlags(PredReg);
+  return true;
+}
+
+
+bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+      ArrayRef<MachineOperand> Pred2) const {
+  // TODO: Fix this
+  return false;
+}
+
+
+bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                   std::vector<MachineOperand> &Pred) const {
+  auto &HRI = getRegisterInfo();
+  for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if (MO.isReg() && MO.isDef()) {
+      const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
+      if (RC == &Hexagon::PredRegsRegClass) {
+        Pred.push_back(MO);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+  bool isPred = MI->getDesc().isPredicable();
+
+  if (!isPred)
+    return false;
+
+  const int Opc = MI->getOpcode();
+  int NumOperands = MI->getNumOperands();
+
+  // Keep a flag for upto 4 operands in the instructions, to indicate if
+  // that operand has been constant extended.
+  bool OpCExtended[4];
+  if (NumOperands > 4)
+    NumOperands = 4;
+
+  for (int i = 0; i < NumOperands; i++)
+    OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI));
+
+  switch(Opc) {
+  case Hexagon::A2_tfrsi:
+    return (isOperandExtended(MI, 1) && isConstExtended(MI)) ||
+           isInt<12>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerd_io:
+    return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storeri_io:
+  case Hexagon::S2_storerinew_io:
+    return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerhnew_io:
+    return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerb_io:
+  case Hexagon::S2_storerbnew_io:
+    return isUInt<6>(MI->getOperand(1).getImm());
+
+  case Hexagon::L2_loadrd_io:
+    return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadri_io:
+    return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+    return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+    return isUInt<6>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrd_pi:
+    return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadri_pi:
+    return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadrh_pi:
+  case Hexagon::L2_loadruh_pi:
+    return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadrb_pi:
+  case Hexagon::L2_loadrub_pi:
+    return isInt<4>(MI->getOperand(3).getImm());
+
+  case Hexagon::S4_storeirb_io:
+  case Hexagon::S4_storeirh_io:
+  case Hexagon::S4_storeiri_io:
+    return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) &&
+           (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm()));
+
+  case Hexagon::A2_addi:
+    return isInt<8>(MI->getOperand(2).getImm());
+
+  case Hexagon::A2_aslh:
+  case Hexagon::A2_asrh:
+  case Hexagon::A2_sxtb:
+  case Hexagon::A2_sxth:
+  case Hexagon::A2_zxtb:
+  case Hexagon::A2_zxth:
+    return true;
+  }
+
+  return true;
+}
+
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+      const MachineBasicBlock *MBB, const MachineFunction &MF) const {
+  // Debug info is never a scheduling boundary. It's necessary to be explicit
+  // due to the special treatment of IT instructions below, otherwise a
+  // dbg_value followed by an IT will result in the IT instruction being
+  // considered a scheduling hazard, which is wrong. It should be the actual
+  // instruction preceding the dbg_value instruction(s), just like it is
+  // when debug info is not present.
+  if (MI->isDebugValue())
+    return false;
+
+  // Throwing call is a boundary.
+  if (MI->isCall()) {
+    // If any of the block's successors is a landing pad, this could be a
+    // throwing call.
+    for (auto I : MBB->successors())
+      if (I->isEHPad())
+        return true;
+  }
+
+  // Don't mess around with no return calls.
+  if (MI->getOpcode() == Hexagon::CALLv3nr)
+    return true;
+
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isPosition())
+    return true;
+
+  if (MI->isInlineAsm() && !ScheduleInlineAsm)
+      return true;
+
+  return false;
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// Hexagon counts the number of ##'s and adjust for that many
+/// constant exenders.
+unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
+      const MCAsmInfo &MAI) const {
+  StringRef AStr(Str);
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  // Add to size number of constant extenders seen * 4.
+  StringRef Occ("##");
+  Length += AStr.count(Occ)*4;
+  return Length;
+}
+
+
+ScheduleHazardRecognizer*
+HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
+      const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+      unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const {
+  unsigned Opc = MI->getOpcode();
+
+  // Set mask and the first source register.
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpltei:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = ~0;
+      break;
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpbgtui:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = 0xFF;
+      break;
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = 0xFFFF;
+      break;
+  }
+
+  // Set the value/second source register.
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+      SrcReg2 = MI->getOperand(2).getReg();
+      return true;
+
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpltei:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpbgtui:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
+      SrcReg2 = 0;
+      Value = MI->getOperand(2).getImm();
+      return true;
+  }
+
+  return false;
+}
+
+
+unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+      const MachineInstr *MI, unsigned *PredCost) const {
+  return getInstrTimingClassLatency(ItinData, MI);
+}
+
+
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+    const TargetSubtargetInfo &STI) const {
+  const InstrItineraryData *II = STI.getInstrItineraryData();
+  return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
+}
+
+
+// Inspired by this pair:
+//  %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
+//  S2_storeri_io %R29, 132, %R1<kill>; flags:  mem:ST4[FixedStack1]
+// Currently AA considers the addresses in these instructions to be aliasing.
+bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+      MachineInstr *MIb, AliasAnalysis *AA) const {
+  int OffsetA = 0, OffsetB = 0;
+  unsigned SizeA = 0, SizeB = 0;
+
+  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+      MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef())
+    return false;
+
+  // Instructions that are pure loads, not loads and stores like memops are not
+  // dependent.
+  if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb))
+    return true;
+
+  // Get base, offset, and access size in MIa.
+  unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
+  if (!BaseRegA || !SizeA)
+    return false;
+
+  // Get base, offset, and access size in MIb.
+  unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
+  if (!BaseRegB || !SizeB)
+    return false;
+
+  if (BaseRegA != BaseRegB)
+    return false;
+
+  // This is a mem access with the same base register and known offsets from it.
+  // Reason about it.
+  if (OffsetA > OffsetB) {
+    uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB);
+    return (SizeB <= offDiff);
+  } else if (OffsetA < OffsetB) {
+    uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA);
+    return (SizeA <= offDiff);
+  }
+
+  return false;
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetRegisterClass *TRC;
+  if (VT == MVT::i1) {
+    TRC = &Hexagon::PredRegsRegClass;
+  } else if (VT == MVT::i32 || VT == MVT::f32) {
+    TRC = &Hexagon::IntRegsRegClass;
+  } else if (VT == MVT::i64 || VT == MVT::f64) {
+    TRC = &Hexagon::DoubleRegsRegClass;
+  } else {
+    llvm_unreachable("Cannot handle this register class");
+  }
+
+  unsigned NewReg = MRI.createVirtualRegister(TRC);
+  return NewReg;
+}
+
+
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+  return (getAddrMode(MI) == HexagonII::AbsoluteSet);
+}
+
+
+bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
+
+bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+  if (!(isTC1(MI))
+      && !(QII->isTC2Early(MI))
+      && !(MI->getDesc().mayLoad())
+      && !(MI->getDesc().mayStore())
+      && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
+      && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+      && !(QII->isMemOp(MI))
+      && !(MI->isBranch())
+      && !(MI->isReturn())
+      && !MI->isCall())
+    return true;
+
+  return false;
+}
+
+
+// Return true if the instruction is a compund branch instruction.
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
+  return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
+}
+
+
+bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
+  return (MI->isBranch() && isPredicated(MI)) ||
+         isConditionalTransfer(MI) ||
+         isConditionalALU32(MI)    ||
+         isConditionalLoad(MI)     ||
+         // Predicated stores which don't have a .new on any operands.
+         (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+          !isPredicatedNew(MI));
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+    case Hexagon::A2_paddf:
+    case Hexagon::A2_paddfnew:
+    case Hexagon::A2_paddif:
+    case Hexagon::A2_paddifnew:
+    case Hexagon::A2_paddit:
+    case Hexagon::A2_padditnew:
+    case Hexagon::A2_paddt:
+    case Hexagon::A2_paddtnew:
+    case Hexagon::A2_pandf:
+    case Hexagon::A2_pandfnew:
+    case Hexagon::A2_pandt:
+    case Hexagon::A2_pandtnew:
+    case Hexagon::A2_porf:
+    case Hexagon::A2_porfnew:
+    case Hexagon::A2_port:
+    case Hexagon::A2_portnew:
+    case Hexagon::A2_psubf:
+    case Hexagon::A2_psubfnew:
+    case Hexagon::A2_psubt:
+    case Hexagon::A2_psubtnew:
+    case Hexagon::A2_pxorf:
+    case Hexagon::A2_pxorfnew:
+    case Hexagon::A2_pxort:
+    case Hexagon::A2_pxortnew:
+    case Hexagon::A4_paslhf:
+    case Hexagon::A4_paslhfnew:
+    case Hexagon::A4_paslht:
+    case Hexagon::A4_paslhtnew:
+    case Hexagon::A4_pasrhf:
+    case Hexagon::A4_pasrhfnew:
+    case Hexagon::A4_pasrht:
+    case Hexagon::A4_pasrhtnew:
+    case Hexagon::A4_psxtbf:
+    case Hexagon::A4_psxtbfnew:
+    case Hexagon::A4_psxtbt:
+    case Hexagon::A4_psxtbtnew:
+    case Hexagon::A4_psxthf:
+    case Hexagon::A4_psxthfnew:
+    case Hexagon::A4_psxtht:
+    case Hexagon::A4_psxthtnew:
+    case Hexagon::A4_pzxtbf:
+    case Hexagon::A4_pzxtbfnew:
+    case Hexagon::A4_pzxtbt:
+    case Hexagon::A4_pzxtbtnew:
+    case Hexagon::A4_pzxthf:
+    case Hexagon::A4_pzxthfnew:
+    case Hexagon::A4_pzxtht:
+    case Hexagon::A4_pzxthtnew:
+    case Hexagon::C2_ccombinewf:
+    case Hexagon::C2_ccombinewt:
+      return true;
+  }
+  return false;
+}
+
+
+// FIXME - Function name and it's functionality don't match.
+// It should be renamed to hasPredNewOpcode()
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
+  if (!MI->getDesc().mayLoad() || !isPredicated(MI))
+    return false;
+
+  int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  // Instruction with valid predicated-new opcode can be promoted to .new.
+  return PNewOpcode >= 0;
+}
+
+
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+    default: return false;
+    case Hexagon::S4_storeirbt_io:
+    case Hexagon::S4_storeirbf_io:
+    case Hexagon::S4_pstorerbt_rr:
+    case Hexagon::S4_pstorerbf_rr:
+    case Hexagon::S2_pstorerbt_io:
+    case Hexagon::S2_pstorerbf_io:
+    case Hexagon::S2_pstorerbt_pi:
+    case Hexagon::S2_pstorerbf_pi:
+    case Hexagon::S2_pstorerdt_io:
+    case Hexagon::S2_pstorerdf_io:
+    case Hexagon::S4_pstorerdt_rr:
+    case Hexagon::S4_pstorerdf_rr:
+    case Hexagon::S2_pstorerdt_pi:
+    case Hexagon::S2_pstorerdf_pi:
+    case Hexagon::S2_pstorerht_io:
+    case Hexagon::S2_pstorerhf_io:
+    case Hexagon::S4_storeirht_io:
+    case Hexagon::S4_storeirhf_io:
+    case Hexagon::S4_pstorerht_rr:
+    case Hexagon::S4_pstorerhf_rr:
+    case Hexagon::S2_pstorerht_pi:
+    case Hexagon::S2_pstorerhf_pi:
+    case Hexagon::S2_pstorerit_io:
+    case Hexagon::S2_pstorerif_io:
+    case Hexagon::S4_storeirit_io:
+    case Hexagon::S4_storeirif_io:
+    case Hexagon::S4_pstorerit_rr:
+    case Hexagon::S4_pstorerif_rr:
+    case Hexagon::S2_pstorerit_pi:
+    case Hexagon::S2_pstorerif_pi:
+
+    // V4 global address store before promoting to dot new.
+    case Hexagon::S4_pstorerdt_abs:
+    case Hexagon::S4_pstorerdf_abs:
+    case Hexagon::S4_pstorerbt_abs:
+    case Hexagon::S4_pstorerbf_abs:
+    case Hexagon::S4_pstorerht_abs:
+    case Hexagon::S4_pstorerhf_abs:
+    case Hexagon::S4_pstorerit_abs:
+    case Hexagon::S4_pstorerif_abs:
+      return true;
+
+    // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+    // from the "Conditional Store" list. Because a predicated new value store
+    // would NOT be promoted to a double dot new store.
+    // This function returns yes for those stores that are predicated but not
+    // yet promoted to predicate dot new instructions.
+  }
+}
+
+
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    case Hexagon::A2_tfrt:
+    case Hexagon::A2_tfrf:
+    case Hexagon::C2_cmoveit:
+    case Hexagon::C2_cmoveif:
+    case Hexagon::A2_tfrtnew:
+    case Hexagon::A2_tfrfnew:
+    case Hexagon::C2_cmovenewit:
+    case Hexagon::C2_cmovenewif:
+    case Hexagon::A2_tfrpt:
+    case Hexagon::A2_tfrpf:
+      return true;
+
+    default:
+      return false;
+  }
+  return false;
+}
+
+
+// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
+// isFPImm and later getFPImm as well.
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+  if (isExtended) // Instruction must be extended.
+    return true;
+
+  unsigned isExtendable =
+    (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+  if (!isExtendable)
+    return false;
+
+  if (MI->isCall())
+    return false;
+
+  short ExtOpNum = getCExtOpNum(MI);
+  const MachineOperand &MO = MI->getOperand(ExtOpNum);
+  // Use MO operand flags to determine if MO
+  // has the HMOTF_ConstExtended flag set.
+  if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+    return true;
+  // If this is a Machine BB address we are talking about, and it is
+  // not marked as extended, say so.
+  if (MO.isMBB())
+    return false;
+
+  // We could be using an instruction with an extendable immediate and shoehorn
+  // a global address into it. If it is a global address it will be constant
+  // extended. We do this for COMBINE.
+  // We currently only handle isGlobal() because it is the only kind of
+  // object we are going to end up with here for now.
+  // In the future we probably should add isSymbol(), etc.
+  if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+      MO.isJTI() || MO.isCPI())
+    return true;
+
+  // If the extendable operand is not 'Immediate' type, the instruction should
+  // have 'isExtended' flag set.
+  assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+  int MinValue = getMinValue(MI);
+  int MaxValue = getMaxValue(MI);
+  int ImmValue = MO.getImm();
+
+  return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::L4_return :
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+  case Hexagon::L4_return_fnew_pt :
+   return true;
+  }
+  return false;
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  const MCInstrDesc &ProdMCID = ProdMI->getDesc();
+  if (!ProdMCID.getNumDefs())
+    return false;
+
+  auto &HRI = getRegisterInfo();
+
+  SmallVector<unsigned, 4> DefsA;
+  SmallVector<unsigned, 4> DefsB;
+  SmallVector<unsigned, 8> UsesA;
+  SmallVector<unsigned, 8> UsesB;
+
+  parseOperands(ProdMI, DefsA, UsesA);
+  parseOperands(ConsMI, DefsB, UsesB);
+
+  for (auto &RegA : DefsA)
+    for (auto &RegB : UsesB) {
+      // True data dependency.
+      if (RegA == RegB)
+        return true;
+
+      if (Hexagon::DoubleRegsRegClass.contains(RegA))
+        for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
+          if (RegB == *SubRegs)
+            return true;
+
+      if (Hexagon::DoubleRegsRegClass.contains(RegB))
+        for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
+          if (RegA == *SubRegs)
+            return true;
+    }
+
+  return false;
+}
+
+
+// Returns true if the instruction is alread a .cur.
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::V6_vL32b_cur_pi:
+  case Hexagon::V6_vL32b_cur_ai:
+  case Hexagon::V6_vL32b_cur_pi_128B:
+  case Hexagon::V6_vL32b_cur_ai_128B:
+    return true;
+  }
+  return false;
+}
+
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
+  if (isNewValueInst(MI) ||
+     (isPredicated(MI) && isPredicatedNew(MI)))
+    return true;
+
+  return false;
+}
+
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
+      const MachineInstr *MIb) const {
+  HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
+  HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
+  return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+    return true;
+
+  // Multiply
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
+    return true;
+  return false;
+}
+
+
+bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
+  return (Opcode == Hexagon::ENDLOOP0 ||
+          Opcode == Hexagon::ENDLOOP1);
+}
+
+
+bool HexagonInstrInfo::isExpr(unsigned OpType) const {
+  switch(OpType) {
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+  const MCInstrDesc &MID = MI->getDesc();
+  const uint64_t F = MID.TSFlags;
+  if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+    return true;
+
+  // TODO: This is largely obsolete now. Will need to be removed
+  // in consecutive patches.
+  switch(MI->getOpcode()) {
+    // TFR_FI Remains a special case.
+    case Hexagon::TFR_FI:
+      return true;
+    default:
+      return false;
+  }
+  return  false;
+}
+
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+  // First check if this is permanently extended op code.
+  const uint64_t F = MI->getDesc().TSFlags;
+  if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+    return true;
+  // Use MO operand flags to determine if one of MI's operands
+  // has HMOTF_ConstExtended flag set.
+  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+       E = MI->operands_end(); I != E; ++I) {
+    if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+      return true;
+  }
+  return  false;
+}
+
+
+bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
+  unsigned Opcode = MI->getOpcode();
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::FPPos) & HexagonII::FPMask;
+}
+
+
+// No V60 HVX VMEM with A_INDIRECT.
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
+      const MachineInstr *J) const {
+  if (!isV60VectorInstruction(I))
+    return false;
+  if (!I->mayLoad() && !I->mayStore())
+    return false;
+  return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
+}
+
+
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::J2_callr :
+  case Hexagon::J2_callrf :
+  case Hexagon::J2_callrt :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::L4_return :
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_fnew_pt :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::J2_jumpr :
+  case Hexagon::J2_jumprt :
+  case Hexagon::J2_jumprf :
+  case Hexagon::J2_jumprtnewpt :
+  case Hexagon::J2_jumprfnewpt  :
+  case Hexagon::J2_jumprtnew :
+  case Hexagon::J2_jumprfnew :
+    return true;
+  }
+  return false;
+}
+
+
+// Return true if a given MI can accomodate given offset.
+// Use abs estimate as oppose to the exact number.
+// TODO: This will need to be changed to use MC level
+// definition of instruction extendable field size.
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+      unsigned offset) const {
+  // This selection of jump instructions matches to that what
+  // AnalyzeBranch can parse, plus NVJ.
+  if (isNewValueJump(MI)) // r9:2
+    return isInt<11>(offset);
+
+  switch (MI->getOpcode()) {
+  // Still missing Jump to address condition on register value.
+  default:
+    return false;
+  case Hexagon::J2_jump: // bits<24> dst; // r22:2
+  case Hexagon::J2_call:
+  case Hexagon::CALLv3nr:
+    return isInt<24>(offset);
+  case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
+  case Hexagon::J2_jumpf:
+  case Hexagon::J2_jumptnew:
+  case Hexagon::J2_jumptnewpt:
+  case Hexagon::J2_jumpfnew:
+  case Hexagon::J2_jumpfnewpt:
+  case Hexagon::J2_callt:
+  case Hexagon::J2_callf:
+    return isInt<17>(offset);
+  case Hexagon::J2_loop0i:
+  case Hexagon::J2_loop0iext:
+  case Hexagon::J2_loop0r:
+  case Hexagon::J2_loop0rext:
+  case Hexagon::J2_loop1i:
+  case Hexagon::J2_loop1iext:
+  case Hexagon::J2_loop1r:
+  case Hexagon::J2_loop1rext:
+    return isInt<9>(offset);
+  // TODO: Add all the compound branches here. Can we do this in Relation model?
+  case Hexagon::J4_cmpeqi_tp0_jump_nt:
+  case Hexagon::J4_cmpeqi_tp1_jump_nt:
+    return isInt<11>(offset);
+  }
+}
+
+
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+      const MachineInstr *ESMI) const {
+  if (!LRMI || !ESMI)
+    return false;
+
+  bool isLate = isLateResultInstr(LRMI);
+  bool isEarly = isEarlySourceInstr(ESMI);
+
+  DEBUG(dbgs() << "V60" <<  (isLate ? "-LR  " : " --  "));
+  DEBUG(LRMI->dump());
+  DEBUG(dbgs() << "V60" <<  (isEarly ? "-ES  " : " --  "));
+  DEBUG(ESMI->dump());
+
+  if (isLate && isEarly) {
+    DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+    return true;
+  }
+
+  return false;
+}
+
+
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  switch (MI->getOpcode()) {
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::PHI:
+    return false;
+  default:
+    break;
+  }
+
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_1_SLOT23:
+  case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+  case Hexagon::Sched::S_2op_tc_1_SLOT23:
+  case Hexagon::Sched::S_3op_tc_1_SLOT23:
+  case Hexagon::Sched::V2LDST_tc_ld_SLOT01:
+  case Hexagon::Sched::V2LDST_tc_st_SLOT0:
+  case Hexagon::Sched::V2LDST_tc_st_SLOT01:
+  case Hexagon::Sched::V4LDST_tc_ld_SLOT01:
+  case Hexagon::Sched::V4LDST_tc_st_SLOT0:
+  case Hexagon::Sched::V4LDST_tc_st_SLOT01:
+    return false;
+  }
+  return true;
+}
+
+
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
+  // resource, but all operands can be received late like an ALU instruction.
+  return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
+}
+
+
+bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
+  unsigned Opcode = MI->getOpcode();
+  return Opcode == Hexagon::J2_loop0i    ||
+         Opcode == Hexagon::J2_loop0r    ||
+         Opcode == Hexagon::J2_loop0iext ||
+         Opcode == Hexagon::J2_loop0rext ||
+         Opcode == Hexagon::J2_loop1i    ||
+         Opcode == Hexagon::J2_loop1r    ||
+         Opcode == Hexagon::J2_loop1iext ||
+         Opcode == Hexagon::J2_loop1rext;
+}
+
+
+bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    default: return false;
+    case Hexagon::L4_iadd_memopw_io :
+    case Hexagon::L4_isub_memopw_io :
+    case Hexagon::L4_add_memopw_io :
+    case Hexagon::L4_sub_memopw_io :
+    case Hexagon::L4_and_memopw_io :
+    case Hexagon::L4_or_memopw_io :
+    case Hexagon::L4_iadd_memoph_io :
+    case Hexagon::L4_isub_memoph_io :
+    case Hexagon::L4_add_memoph_io :
+    case Hexagon::L4_sub_memoph_io :
+    case Hexagon::L4_and_memoph_io :
+    case Hexagon::L4_or_memoph_io :
+    case Hexagon::L4_iadd_memopb_io :
+    case Hexagon::L4_isub_memopb_io :
+    case Hexagon::L4_add_memopb_io :
+    case Hexagon::L4_sub_memopb_io :
+    case Hexagon::L4_and_memopb_io :
+    case Hexagon::L4_or_memopb_io :
+    case Hexagon::L4_ior_memopb_io:
+    case Hexagon::L4_ior_memoph_io:
+    case Hexagon::L4_ior_memopw_io:
+    case Hexagon::L4_iand_memopb_io:
+    case Hexagon::L4_iand_memoph_io:
+    case Hexagon::L4_iand_memopw_io:
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+  return isNewValueJump(MI) || isNewValueStore(MI);
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+  return isNewValue(MI) && MI->isBranch();
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
+  return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+    unsigned OperandNum) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+          == OperandNum;
+}
+
+
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
+  return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  assert(isPredicated(MI));
+  return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  assert(isPredicated(Opcode));
+  return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return !((F >> HexagonII::PredicatedFalsePos) &
+           HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  // Make sure that the instruction is predicated.
+  assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+  return !((F >> HexagonII::PredicatedFalsePos) &
+           HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+}
+
+
+bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  assert(get(Opcode).isBranch() &&
+         (isPredicatedNew(Opcode) || isNewValue(Opcode)));
+  return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
+}
+
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+  return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+         MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+}
+
+
+bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
+}
+
+
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::STriw_pred :
+  case Hexagon::LDriw_pred :
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+// Returns true when SU has a timing class TC1.
+bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_1_SLOT23:
+  case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+  //case Hexagon::Sched::M_tc_1_SLOT23:
+  case Hexagon::Sched::S_2op_tc_1_SLOT23:
+  case Hexagon::Sched::S_3op_tc_1_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_2_SLOT23:
+  case Hexagon::Sched::CR_tc_2_SLOT3:
+  case Hexagon::Sched::M_tc_2_SLOT23:
+  case Hexagon::Sched::S_2op_tc_2_SLOT23:
+  case Hexagon::Sched::S_3op_tc_2_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_2early_SLOT23:
+  case Hexagon::Sched::CR_tc_2early_SLOT23:
+  case Hexagon::Sched::CR_tc_2early_SLOT3:
+  case Hexagon::Sched::J_tc_2early_SLOT0123:
+  case Hexagon::Sched::J_tc_2early_SLOT2:
+  case Hexagon::Sched::J_tc_2early_SLOT23:
+  case Hexagon::Sched::S_2op_tc_2early_SLOT23:
+  case Hexagon::Sched::S_3op_tc_2early_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
+}
+
+
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  const uint64_t V = getType(MI);
+  return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
+}
+
+
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
+  if (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+      VT == MVT::v32i16 || VT == MVT::v64i8) {
+      return (Offset >= Hexagon_MEMV_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMV_AUTOINC_MAX &&
+              (Offset & 0x3f) == 0);
+  }
+  // 128B
+  if (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+      VT == MVT::v64i16 || VT == MVT::v128i8) {
+      return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B &&
+              Offset <= Hexagon_MEMV_AUTOINC_MAX_128B &&
+              (Offset & 0x7f) == 0);
+  }
+  if (VT == MVT::i64) {
+      return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+              (Offset & 0x7) == 0);
+  }
+  if (VT == MVT::i32) {
+      return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+              (Offset & 0x3) == 0);
+  }
+  if (VT == MVT::i16) {
+      return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+              (Offset & 0x1) == 0);
+  }
+  if (VT == MVT::i8) {
+      return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMB_AUTOINC_MAX);
+  }
+  llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
+      bool Extend) const {
+  // This function is to check whether the "Offset" is in the correct range of
+  // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
+  // inserted to calculate the final address. Due to this reason, the function
+  // assumes that the "Offset" has correct alignment.
+  // We used to assert if the offset was not properly aligned, however,
+  // there are cases where a misaligned pointer recast can cause this
+  // problem, and we need to allow for it. The front end warns of such
+  // misaligns with respect to load size.
+
+  switch (Opcode) {
+  case Hexagon::STriq_pred_V6:
+  case Hexagon::STriq_pred_vec_V6:
+  case Hexagon::STriv_pseudo_V6:
+  case Hexagon::STrivv_pseudo_V6:
+  case Hexagon::LDriq_pred_V6:
+  case Hexagon::LDriq_pred_vec_V6:
+  case Hexagon::LDriv_pseudo_V6:
+  case Hexagon::LDrivv_pseudo_V6:
+  case Hexagon::LDrivv_indexed:
+  case Hexagon::STrivv_indexed:
+  case Hexagon::V6_vL32b_ai:
+  case Hexagon::V6_vS32b_ai:
+  case Hexagon::V6_vL32Ub_ai:
+  case Hexagon::V6_vS32Ub_ai:
+    return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMV_OFFSET_MAX);
+
+  case Hexagon::STriq_pred_V6_128B:
+  case Hexagon::STriq_pred_vec_V6_128B:
+  case Hexagon::STriv_pseudo_V6_128B:
+  case Hexagon::STrivv_pseudo_V6_128B:
+  case Hexagon::LDriq_pred_V6_128B:
+  case Hexagon::LDriq_pred_vec_V6_128B:
+  case Hexagon::LDriv_pseudo_V6_128B:
+  case Hexagon::LDrivv_pseudo_V6_128B:
+  case Hexagon::LDrivv_indexed_128B:
+  case Hexagon::STrivv_indexed_128B:
+  case Hexagon::V6_vL32b_ai_128B:
+  case Hexagon::V6_vS32b_ai_128B:
+  case Hexagon::V6_vL32Ub_ai_128B:
+  case Hexagon::V6_vS32Ub_ai_128B:
+    return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
+      (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+
+  case Hexagon::J2_loop0i:
+  case Hexagon::J2_loop1i:
+    return isUInt<10>(Offset);
+  }
+
+  if (Extend)
+    return true;
+
+  switch (Opcode) {
+  case Hexagon::L2_loadri_io:
+  case Hexagon::S2_storeri_io:
+    return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+  case Hexagon::L2_loadrd_io:
+  case Hexagon::S2_storerd_io:
+    return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+  case Hexagon::S2_storerh_io:
+    return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+  case Hexagon::S2_storerb_io:
+    return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+  case Hexagon::A2_addi:
+    return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+      (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+  case Hexagon::L4_iadd_memopw_io :
+  case Hexagon::L4_isub_memopw_io :
+  case Hexagon::L4_add_memopw_io :
+  case Hexagon::L4_sub_memopw_io :
+  case Hexagon::L4_and_memopw_io :
+  case Hexagon::L4_or_memopw_io :
+    return (0 <= Offset && Offset <= 255);
+
+  case Hexagon::L4_iadd_memoph_io :
+  case Hexagon::L4_isub_memoph_io :
+  case Hexagon::L4_add_memoph_io :
+  case Hexagon::L4_sub_memoph_io :
+  case Hexagon::L4_and_memoph_io :
+  case Hexagon::L4_or_memoph_io :
+    return (0 <= Offset && Offset <= 127);
+
+  case Hexagon::L4_iadd_memopb_io :
+  case Hexagon::L4_isub_memopb_io :
+  case Hexagon::L4_add_memopb_io :
+  case Hexagon::L4_sub_memopb_io :
+  case Hexagon::L4_and_memopb_io :
+  case Hexagon::L4_or_memopb_io :
+    return (0 <= Offset && Offset <= 63);
+
+  // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+  // any size. Later pass knows how to handle it.
+  case Hexagon::STriw_pred:
+  case Hexagon::LDriw_pred:
+    return true;
+
+  case Hexagon::TFR_FI:
+  case Hexagon::TFR_FIA:
+  case Hexagon::INLINEASM:
+    return true;
+
+  case Hexagon::L2_ploadrbt_io:
+  case Hexagon::L2_ploadrbf_io:
+  case Hexagon::L2_ploadrubt_io:
+  case Hexagon::L2_ploadrubf_io:
+  case Hexagon::S2_pstorerbt_io:
+  case Hexagon::S2_pstorerbf_io:
+  case Hexagon::S4_storeirb_io:
+  case Hexagon::S4_storeirbt_io:
+  case Hexagon::S4_storeirbf_io:
+    return isUInt<6>(Offset);
+
+  case Hexagon::L2_ploadrht_io:
+  case Hexagon::L2_ploadrhf_io:
+  case Hexagon::L2_ploadruht_io:
+  case Hexagon::L2_ploadruhf_io:
+  case Hexagon::S2_pstorerht_io:
+  case Hexagon::S2_pstorerhf_io:
+  case Hexagon::S4_storeirh_io:
+  case Hexagon::S4_storeirht_io:
+  case Hexagon::S4_storeirhf_io:
+    return isShiftedUInt<6,1>(Offset);
+
+  case Hexagon::L2_ploadrit_io:
+  case Hexagon::L2_ploadrif_io:
+  case Hexagon::S2_pstorerit_io:
+  case Hexagon::S2_pstorerif_io:
+  case Hexagon::S4_storeiri_io:
+  case Hexagon::S4_storeirit_io:
+  case Hexagon::S4_storeirif_io:
+    return isShiftedUInt<6,2>(Offset);
+
+  case Hexagon::L2_ploadrdt_io:
+  case Hexagon::L2_ploadrdf_io:
+  case Hexagon::S2_pstorerdt_io:
+  case Hexagon::S2_pstorerdf_io:
+    return isShiftedUInt<6,3>(Offset);
+  } // switch
+
+  llvm_unreachable("No offset range is defined for this opcode. "
+                   "Please define it in the above switch statement!");
+}
+
+
+bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
+  return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+}
+
+
+bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+  const uint64_t F = get(MI->getOpcode()).TSFlags;
+  const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+  return
+    V == HexagonII::TypeCVI_VA         ||
+    V == HexagonII::TypeCVI_VA_DV;
+}
+
+
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
+    return true;
+
+  if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI)))
+    return true;
+
+  if (mayBeNewStore(ConsMI))
+    return true;
+
+  return false;
+}
+
+
+/// \brief Can these instructions execute at the same time in a bundle.
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
+      const MachineInstr *Second) const {
+  if (DisableNVSchedule)
+    return false;
+  if (mayBeNewStore(Second)) {
+    // Make sure the definition of the first instruction is the value being
+    // stored.
+    const MachineOperand &Stored =
+      Second->getOperand(Second->getNumOperands() - 1);
+    if (!Stored.isReg())
+      return false;
+    for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
+      const MachineOperand &Op = First->getOperand(i);
+      if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
+        return true;
+    }
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
+  for (auto &I : *B)
+    if (I.isEHLabel())
+      return true;
+  return false;
+}
+
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+  short NonExtOpcode;
+  // Check if the instruction has a register form that uses register in place
+  // of the extended operand, if so return that as the non-extended form.
+  if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+    return true;
+
+  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+    // Check addressing mode and retrieve non-ext equivalent instruction.
+
+    switch (getAddrMode(MI)) {
+    case HexagonII::Absolute :
+      // Load/store with absolute addressing mode can be converted into
+      // base+offset mode.
+      NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+      break;
+    case HexagonII::BaseImmOffset :
+      // Load/store with base+offset addressing mode can be converted into
+      // base+register offset addressing mode. However left shift operand should
+      // be set to 0.
+      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+      break;
+    case HexagonII::BaseLongOffset:
+      NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+      break;
+    default:
+      return false;
+    }
+    if (NonExtOpcode < 0)
+      return false;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(),
+                                 Hexagon::InstrType_Pseudo) >= 0;
+}
+
+
+bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
+      const {
+  MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+  while (I != E) {
+    if (I->isBarrier())
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+
+// Returns true, if a LD insn can be promoted to a cur load.
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
+  auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
+         HST.hasV60TOps();
+}
+
+
+// Returns true, if a ST insn can be promoted to a new-value store.
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  // There is no stall when ProdMI is not a V60 vector.
+  if (!isV60VectorInstruction(ProdMI))
+    return false;
+
+  // There is no stall when ProdMI and ConsMI are not dependent.
+  if (!isDependent(ProdMI, ConsMI))
+    return false;
+
+  // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI
+  // are scheduled in consecutive packets.
+  if (isVecUsableNextPacket(ProdMI, ConsMI))
+    return false;
+
+  return true;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+      MachineBasicBlock::const_instr_iterator BII) const {
+  // There is no stall when I is not a V60 vector.
+  if (!isV60VectorInstruction(MI))
+    return false;
+
+  MachineBasicBlock::const_instr_iterator MII = BII;
+  MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
+
+  if (!(*MII).isBundle()) {
+    const MachineInstr *J = &*MII;
+    if (!isV60VectorInstruction(J))
+      return false;
+    else if (isVecUsableNextPacket(J, MI))
+      return false;
+    return true;
+  }
+
+  for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
+    const MachineInstr *J = &*MII;
+    if (producesStall(J, MI))
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+      unsigned PredReg) const {
+  for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+    const MachineOperand &MO = MI->getOperand(opNum);
+    if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
+      return false; // Predicate register must be explicitly defined.
+  }
+
+  // Hexagon Programmer's Reference says that decbin, memw_locked, and
+  // memd_locked cannot be used as .new as well,
+  // but we don't seem to have these instructions defined.
+  return MI->getOpcode() != Hexagon::A4_tlbmatch;
+}
+
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
+  return (Opcode == Hexagon::J2_jumpt)      ||
+         (Opcode == Hexagon::J2_jumpf)      ||
+         (Opcode == Hexagon::J2_jumptnew)   ||
+         (Opcode == Hexagon::J2_jumpfnew)   ||
+         (Opcode == Hexagon::J2_jumptnewpt) ||
+         (Opcode == Hexagon::J2_jumpfnewpt);
+}
+
+
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
+  if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+    return false;
+  return !isPredicatedTrue(Cond[0].getImm());
+}
+
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
+}
+
+
+// Returns the base register in a memory access (load/store). The offset is
+// returned in Offset and the access size is returned in AccessSize.
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+      int &Offset, unsigned &AccessSize) const {
+  // Return if it is not a base+offset type instruction or a MemOp.
+  if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
+      getAddrMode(MI) != HexagonII::BaseLongOffset &&
+      !isMemOp(MI) && !isPostIncrement(MI))
+    return 0;
+
+  // Since it is a memory access instruction, getMemAccessSize() should never
+  // return 0.
+  assert (getMemAccessSize(MI) &&
+          "BaseImmOffset or BaseLongOffset or MemOp without accessSize");
+
+  // Return Values of getMemAccessSize() are
+  // 0 - Checked in the assert above.
+  // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these.
+  // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+  AccessSize = (1U << (getMemAccessSize(MI) - 1));
+
+  unsigned basePos = 0, offsetPos = 0;
+  if (!getBaseAndOffsetPosition(MI, basePos, offsetPos))
+    return 0;
+
+  // Post increment updates its EA after the mem access,
+  // so we need to treat its offset as zero.
+  if (isPostIncrement(MI))
+    Offset = 0;
+  else {
+    Offset = MI->getOperand(offsetPos).getImm();
+  }
+
+  return MI->getOperand(basePos).getReg();
+}
+
+
+/// Return the position of the base and offset operands for this instruction.
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+      unsigned &BasePos, unsigned &OffsetPos) const {
+  // Deal with memops first.
+  if (isMemOp(MI)) {
+    assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+            "Bad Memop.");
+    BasePos = 0;
+    OffsetPos = 1;
+  } else if (MI->mayStore()) {
+    BasePos = 0;
+    OffsetPos = 1;
+  } else if (MI->mayLoad()) {
+    BasePos = 1;
+    OffsetPos = 2;
+  } else
+    return false;
+
+  if (isPredicated(MI)) {
+    BasePos++;
+    OffsetPos++;
+  }
+  if (isPostIncrement(MI)) {
+    BasePos++;
+    OffsetPos++;
+  }
+
+  if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+    return false;
+
+  return true;
+}
+
+
+// Inserts branching instructions in reverse order of their occurence.
+// e.g. jump_t t1 (i1)
+// jump t2        (i2)
+// Jumpers = {i2, i1}
+SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
+      MachineBasicBlock& MBB) const {
+  SmallVector<MachineInstr*, 2> Jumpers;
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  if (I == MBB.instr_begin())
+    return Jumpers;
+
+  // A basic block may looks like this:
+  //
+  //  [   insn
+  //     EH_LABEL
+  //      insn
+  //      insn
+  //      insn
+  //     EH_LABEL
+  //      insn     ]
+  //
+  // It has two succs but does not have a terminator
+  // Don't know how to handle it.
+  do {
+    --I;
+    if (I->isEHLabel())
+      return Jumpers;
+  } while (I != MBB.instr_begin());
+
+  I = MBB.instr_end();
+  --I;
+
+  while (I->isDebugValue()) {
+    if (I == MBB.instr_begin())
+      return Jumpers;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(&*I))
+    return Jumpers;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  Jumpers.push_back(LastInst);
+  MachineInstr *SecondLastInst = nullptr;
+  // Find one more terminator if present.
+  do {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+      if (!SecondLastInst) {
+        SecondLastInst = &*I;
+        Jumpers.push_back(SecondLastInst);
+      } else // This is a third branch.
+        return Jumpers;
+    }
+    if (I == MBB.instr_begin())
+      break;
+    --I;
+  } while (true);
+  return Jumpers;
+}
+
+
+// Returns Operand Index for the constant extended instruction.
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
+}
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
+      const MachineInstr *MI) const {
+  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
+  switch (MI->getOpcode()) {
+  default:
+    return HexagonII::HCG_None;
+  //
+  // Compound pairs.
+  // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+  // "Rd16=#U6 ; jump #r9:2"
+  // "Rd16=Rs16 ; jump #r9:2"
+  //
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtu:
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgtui:
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+        ((isUInt<5>(MI->getOperand(2).getImm())) ||
+         (MI->getOperand(2).getImm() == -1)))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfr:
+    // Rd = Rs
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfrsi:
+    // Rd = #u6
+    // Do not test for #u6 size since the const is getting extended
+    // regardless and compound could be formed.
+    DstReg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(DstReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::S2_tstbit_i:
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        MI->getOperand(2).isImm() &&
+        isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+      return HexagonII::HCG_A;
+    break;
+  // The fact that .new form is used pretty much guarantees
+  // that predicate register will match. Nevertheless,
+  // there could be some false positives without additional
+  // checking.
+  case Hexagon::J2_jumptnew:
+  case Hexagon::J2_jumpfnew:
+  case Hexagon::J2_jumptnewpt:
+  case Hexagon::J2_jumpfnewpt:
+    Src1Reg = MI->getOperand(0).getReg();
+    if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
+        (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
+      return HexagonII::HCG_B;
+    break;
+  // Transfer and jump:
+  // Rd=#U6 ; jump #r9:2
+  // Rd=Rs ; jump #r9:2
+  // Do not test for jump range here.
+  case Hexagon::J2_jump:
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+    return HexagonII::HCG_C;
+    break;
+  }
+
+  return HexagonII::HCG_None;
+}
+
+
+// Returns -1 when there is no opcode found.
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
+      const MachineInstr *GB) const {
+  assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
+  assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
+  if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
+      (GB->getOpcode() != Hexagon::J2_jumptnew))
+    return -1;
+  unsigned DestReg = GA->getOperand(0).getReg();
+  if (!GB->readsRegister(DestReg))
+    return -1;
+  if (DestReg == Hexagon::P0)
+    return Hexagon::J4_cmpeqi_tp0_jump_nt;
+  if (DestReg == Hexagon::P1)
+    return Hexagon::J4_cmpeqi_tp1_jump_nt;
+  return -1;
+}
+
+
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
+  enum Hexagon::PredSense inPredSense;
+  inPredSense = invertPredicate ? Hexagon::PredSense_false :
+                                  Hexagon::PredSense_true;
+  int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+  if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+    return CondOpcode;
+
+  // This switch case will be removed once all the instructions have been
+  // modified to use relation maps.
+  switch(Opc) {
+  case Hexagon::TFRI_f:
+    return !invertPredicate ? Hexagon::TFRI_cPt_f :
+                              Hexagon::TFRI_cNotPt_f;
+  }
+
+  llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+// Return the cur value instruction for a given store.
+int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown .cur type");
+  case Hexagon::V6_vL32b_pi:
+    return Hexagon::V6_vL32b_cur_pi;
+  case Hexagon::V6_vL32b_ai:
+    return Hexagon::V6_vL32b_cur_ai;
+  //128B
+  case Hexagon::V6_vL32b_pi_128B:
+    return Hexagon::V6_vL32b_cur_pi_128B;
+  case Hexagon::V6_vL32b_ai_128B:
+    return Hexagon::V6_vL32b_cur_ai_128B;
+  }
+  return 0;
+}
+
+
+
+// The diagram below shows the steps involved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+//               p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+//                ^           ^
+//               /             \ (not OK. it will cause new-value store to be
+//              /               X conditional on p0.new while R2 producer is
+//             /                 \ on p0)
+//            /                   \.
+//     p.new store                 p.old NV store
+// [if(p0.new)memw(R0+#0)=R2]    [if(p0)memw(R0+#0)=R2.new]
+//            ^                  ^
+//             \                /
+//              \              /
+//               \            /
+//                 p.old store
+//             [if (p0)memw(R0+#0)=R2]
+//
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+//   2) p0 = cmp.eq(r3, #0) }
+//
+//   3) if (p0) memb(r1+#0) = r0  --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+// from the "Conditional Store" list. Because a predicated new value store
+// would NOT be promoted to a double dot new store. See diagram below:
+// This function returns yes for those stores that are predicated but not
+// yet promoted to predicate dot new instructions.
+//
+//                          +---------------------+
+//                    /-----| if (p0) memw(..)=r0 |---------\~
+//                   ||     +---------------------+         ||
+//          promote  ||       /\       /\                   ||  promote
+//                   ||      /||\     /||\                  ||
+//                  \||/    demote     ||                  \||/
+//                   \/       ||       ||                   \/
+//       +-------------------------+   ||   +-------------------------+
+//       | if (p0.new) memw(..)=r0 |   ||   | if (p0) memw(..)=r0.new |
+//       +-------------------------+   ||   +-------------------------+
+//                        ||           ||         ||
+//                        ||         demote      \||/
+//                      promote        ||         \/ NOT possible
+//                        ||           ||         /\~
+//                       \||/          ||        /||\~
+//                        \/           ||         ||
+//                      +-----------------------------+
+//                      | if (p0.new) memw(..)=r0.new |
+//                      +-----------------------------+
+//                           Double Dot New Store
+//
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new  --->
+// 2) if (p0) memw(R0+#0)= R1.new      -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1      --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+//   if (p0.new) R2 = add(R3, R4)
+//   R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+
+// Return the new value instruction for a given store.
+int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
+  int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
+  if (NVOpcode >= 0) // Valid new-value store instruction.
+    return NVOpcode;
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown .new type");
+  case Hexagon::S4_storerb_ur:
+    return Hexagon::S4_storerbnew_ur;
+
+  case Hexagon::S2_storerb_pci:
+    return Hexagon::S2_storerb_pci;
+
+  case Hexagon::S2_storeri_pci:
+    return Hexagon::S2_storeri_pci;
+
+  case Hexagon::S2_storerh_pci:
+    return Hexagon::S2_storerh_pci;
+
+  case Hexagon::S2_storerd_pci:
+    return Hexagon::S2_storerd_pci;
+
+  case Hexagon::S2_storerf_pci:
+    return Hexagon::S2_storerf_pci;
+
+  case Hexagon::V6_vS32b_ai:
+    return Hexagon::V6_vS32b_new_ai;
+
+  case Hexagon::V6_vS32b_pi:
+    return Hexagon::V6_vS32b_new_pi;
+
+  // 128B
+  case Hexagon::V6_vS32b_ai_128B:
+    return Hexagon::V6_vS32b_new_ai_128B;
+
+  case Hexagon::V6_vS32b_pi_128B:
+    return Hexagon::V6_vS32b_new_pi_128B;
+  }
+  return 0;
+}
+
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+      const MachineBranchProbabilityInfo *MBPI) const {
+  // We assume that block can have at most two successors.
+  bool taken = false;
+  const MachineBasicBlock *Src = MI->getParent();
+  const MachineOperand *BrTarget = &MI->getOperand(1);
+  const MachineBasicBlock *Dst = BrTarget->getMBB();
+
+  const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
+  switch (MI->getOpcode()) {
+  case Hexagon::J2_jumpt:
+    return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+  case Hexagon::J2_jumpf:
+    return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+
+  default:
+    llvm_unreachable("Unexpected jump instruction.");
+  }
+}
+
+
+// Return .new predicate version for an instruction.
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+      const MachineBranchProbabilityInfo *MBPI) const {
+  int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  if (NewOpcode >= 0) // Valid predicate new instruction
+    return NewOpcode;
+
+  switch (MI->getOpcode()) {
+  // Condtional Jumps
+  case Hexagon::J2_jumpt:
+  case Hexagon::J2_jumpf:
+    return getDotNewPredJumpOp(MI, MBPI);
+
+  default:
+    assert(0 && "Unknown .new type");
+  }
+  return 0;
+}
+
+
+int HexagonInstrInfo::getDotOldOp(const int opc) const {
+  int NewOp = opc;
+  if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
+    NewOp = Hexagon::getPredOldOpcode(NewOp);
+    assert(NewOp >= 0 &&
+           "Couldn't change predicate new instruction to its old form.");
+  }
+
+  if (isNewValueStore(NewOp)) { // Convert into non-new-value format
+    NewOp = Hexagon::getNonNVStore(NewOp);
+    assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
+  }
+  return NewOp;
+}
+
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
+      const MachineInstr *MI) const {
+  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+  auto &HRI = getRegisterInfo();
+
+  switch (MI->getOpcode()) {
+  default:
+    return HexagonII::HSIG_None;
+  //
+  // Group L1:
+  //
+  // Rd = memw(Rs+#u4:2)
+  // Rd = memub(Rs+#u4:0)
+  case Hexagon::L2_loadri_io:
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    // Special case this one from Group L2.
+    // Rd = memw(r29+#u5:2)
+    if (isIntRegForSubInst(DstReg)) {
+      if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+          HRI.getStackRegister() == SrcReg &&
+          MI->getOperand(2).isImm() &&
+          isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_L2;
+      // Rd = memw(Rs+#u4:2)
+      if (isIntRegForSubInst(SrcReg) &&
+          (MI->getOperand(2).isImm() &&
+          isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+        return HexagonII::HSIG_L1;
+    }
+    break;
+  case Hexagon::L2_loadrub_io:
+    // Rd = memub(Rs+#u4:0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L1;
+    break;
+  //
+  // Group L2:
+  //
+  // Rd = memh/memuh(Rs+#u3:1)
+  // Rd = memb(Rs+#u3:0)
+  // Rd = memw(r29+#u5:2) - Handled above.
+  // Rdd = memd(r29+#u5:3)
+  // deallocframe
+  // [if ([!]p0[.new])] dealloc_return
+  // [if ([!]p0[.new])] jumpr r31
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+    // Rd = memh/memuh(Rs+#u3:1)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::L2_loadrb_io:
+    // Rd = memb(Rs+#u3:0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        isUInt<3>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::L2_loadrd_io:
+    // Rdd = memd(r29+#u5:3)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) &&
+        Hexagon::IntRegsRegClass.contains(SrcReg) &&
+        HRI.getStackRegister() == SrcReg &&
+        MI->getOperand(2).isImm() &&
+        isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  // dealloc_return is not documented in Hexagon Manual, but marked
+  // with A_SUBINSN attribute in iset_v4classic.py.
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+  case Hexagon::L4_return:
+  case Hexagon::L2_deallocframe:
+    return HexagonII::HSIG_L2;
+  case Hexagon::EH_RETURN_JMPR:
+  case Hexagon::JMPret :
+    // jumpr r31
+    // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+    DstReg = MI->getOperand(0).getReg();
+    if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::JMPrett:
+  case Hexagon::JMPretf:
+  case Hexagon::JMPrettnewpt:
+  case Hexagon::JMPretfnewpt :
+  case Hexagon::JMPrettnew :
+  case Hexagon::JMPretfnew :
+    DstReg = MI->getOperand(1).getReg();
+    SrcReg = MI->getOperand(0).getReg();
+    // [if ([!]p0[.new])] jumpr r31
+    if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
+        (Hexagon::P0 == SrcReg)) &&
+        (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
+      return HexagonII::HSIG_L2;
+     break;
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+  case Hexagon::L4_return_fnew_pt :
+    // [if ([!]p0[.new])] dealloc_return
+    SrcReg = MI->getOperand(0).getReg();
+    if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
+      return HexagonII::HSIG_L2;
+    break;
+  //
+  // Group S1:
+  //
+  // memw(Rs+#u4:2) = Rt
+  // memb(Rs+#u4:0) = Rt
+  case Hexagon::S2_storeri_io:
+    // Special case this one from Group S2.
+    // memw(r29+#u5:2) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+        isIntRegForSubInst(Src2Reg) &&
+        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+        isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S2;
+    // memw(Rs+#u4:2) = Rt
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  case Hexagon::S2_storerb_io:
+    // memb(Rs+#u4:0) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  //
+  // Group S2:
+  //
+  // memh(Rs+#u3:1) = Rt
+  // memw(r29+#u5:2) = Rt
+  // memd(r29+#s6:3) = Rtt
+  // memw(Rs+#u4:2) = #U1
+  // memb(Rs+#u4) = #U1
+  // allocframe(#u5:3)
+  case Hexagon::S2_storerh_io:
+    // memh(Rs+#u3:1) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() &&
+        isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  case Hexagon::S2_storerd_io:
+    // memd(r29+#s6:3) = Rtt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isDblRegForSubInst(Src2Reg, HRI) &&
+        Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+        isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S4_storeiri_io:
+    // memw(Rs+#u4:2) = #U1
+    Src1Reg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
+        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S4_storeirb_io:
+    // memb(Rs+#u4) = #U1
+    Src1Reg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+        isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() &&
+        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S2_allocframe:
+    if (MI->getOperand(0).isImm() &&
+        isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  //
+  // Group A:
+  //
+  // Rx = add(Rx,#s7)
+  // Rd = Rs
+  // Rd = #u6
+  // Rd = #-1
+  // if ([!]P0[.new]) Rd = #0
+  // Rd = add(r29,#u6:2)
+  // Rx = add(Rx,Rs)
+  // P0 = cmp.eq(Rs,#u2)
+  // Rdd = combine(#0,Rs)
+  // Rdd = combine(Rs,#0)
+  // Rdd = combine(#u2,#U2)
+  // Rd = add(Rs,#1)
+  // Rd = add(Rs,#-1)
+  // Rd = sxth/sxtb/zxtb/zxth(Rs)
+  // Rd = and(Rs,#1)
+  case Hexagon::A2_addi:
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg)) {
+      // Rd = add(r29,#u6:2)
+      if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+        HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
+        isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_A;
+      // Rx = add(Rx,#s7)
+      if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
+          isInt<7>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_A;
+      // Rd = add(Rs,#1)
+      // Rd = add(Rs,#-1)
+      if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+          ((MI->getOperand(2).getImm() == 1) ||
+          (MI->getOperand(2).getImm() == -1)))
+        return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_add:
+    // Rx = add(Rx,Rs)
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+        isIntRegForSubInst(Src2Reg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_andir:
+    // Same as zxtb.
+    // Rd16=and(Rs16,#255)
+    // Rd16=and(Rs16,#1)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        ((MI->getOperand(2).getImm() == 1) ||
+        (MI->getOperand(2).getImm() == 255)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_tfr:
+    // Rd = Rs
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_tfrsi:
+    // Rd = #u6
+    // Do not test for #u6 size since the const is getting extended
+    // regardless and compound could be formed.
+    // Rd = #-1
+    DstReg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(DstReg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::C2_cmoveit:
+  case Hexagon::C2_cmovenewit:
+  case Hexagon::C2_cmoveif:
+  case Hexagon::C2_cmovenewif:
+    // if ([!]P0[.new]) Rd = #0
+    // Actual form:
+    // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) &&
+        Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
+        MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::C2_cmpeqi:
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_combineii:
+  case Hexagon::A4_combineii:
+    // Rdd = combine(#u2,#U2)
+    DstReg = MI->getOperand(0).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) &&
+        ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
+        (MI->getOperand(1).isGlobal() &&
+        isUInt<2>(MI->getOperand(1).getOffset()))) &&
+        ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
+        (MI->getOperand(2).isGlobal() &&
+        isUInt<2>(MI->getOperand(2).getOffset()))))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A4_combineri:
+    // Rdd = combine(Rs,#0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+        ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
+        (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A4_combineir:
+    // Rdd = combine(#0,Rs)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(2).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+        ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
+        (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_sxtb:
+  case Hexagon::A2_sxth:
+  case Hexagon::A2_zxtb:
+  case Hexagon::A2_zxth:
+    // Rd = sxth/sxtb/zxtb/zxth(Rs)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HSIG_A;
+    break;
+  }
+
+  return HexagonII::HSIG_None;
+}
+
+
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+}
+
+
+// Return first non-debug instruction in the basic block.
+MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
+      const {
+  for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
+    MachineInstr *MI = &*MII;
+    if (MI->isDebugValue())
+      continue;
+    return MI;
+  }
+  return nullptr;
+}
+
+
+unsigned HexagonInstrInfo::getInstrTimingClassLatency(
+      const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+  // Default to one cycle for no itinerary. However, an "empty" itinerary may
+  // still have a MinLatency property, which getStageLatency checks.
+  if (!ItinData)
+    return getInstrLatency(ItinData, MI);
+
+  // Get the latency embedded in the itinerary. If we're not using timing class
+  // latencies or if we using BSB scheduling, then restrict the maximum latency
+  // to 1 (that is, either 0 or 1).
+  if (MI->isTransient())
+    return 0;
+  unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+  if (!EnableTimingClassLatency ||
+      MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+      useBSBScheduling())
+    if (Latency > 1)
+      Latency = 1;
+  return Latency;
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::getInvertedPredSense(
+      SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty())
+    return false;
+  unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm());
+  Cond[0].setImm(Opc);
+  return true;
+}
+
+
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+  int InvPredOpcode;
+  InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+                                        : Hexagon::getTruePredOpcode(Opc);
+  if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+    return InvPredOpcode;
+
+  llvm_unreachable("Unexpected predicated instruction");
+}
+
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return ~(-1U << (bits - 1));
+  else
+    return ~(-1U << bits);
+}
+
+
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
+}
+
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1U << (bits - 1);
+  else
+    return 0;
+}
+
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
+  // Check if the instruction has a register form that uses register in place
+  // of the extended operand, if so return that as the non-extended form.
+  short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+    if (NonExtOpcode >= 0)
+      return NonExtOpcode;
+
+  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+    // Check addressing mode and retrieve non-ext equivalent instruction.
+    switch (getAddrMode(MI)) {
+    case HexagonII::Absolute :
+      return Hexagon::getBaseWithImmOffset(MI->getOpcode());
+    case HexagonII::BaseImmOffset :
+      return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+    case HexagonII::BaseLongOffset:
+      return Hexagon::getRegShlForm(MI->getOpcode());
+
+    default:
+      return -1;
+    }
+  }
+  return -1;
+}
+
+
+bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
+      unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
+  if (Cond.empty())
+    return false;
+  assert(Cond.size() == 2);
+  if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
+     DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+     return false;
+  }
+  PredReg = Cond[1].getReg();
+  PredRegPos = 1;
+  // See IfConversion.cpp why we add RegState::Implicit | RegState::Undef
+  PredRegFlags = 0;
+  if (Cond[1].isImplicit())
+    PredRegFlags = RegState::Implicit;
+  if (Cond[1].isUndef())
+    PredRegFlags |= RegState::Undef;
+  return true;
+}
+
+
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+}
+
+
+short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
+  return Hexagon::getRegForm(MI->getOpcode());
+}
+
+
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+  if (MI->isDebugValue() || MI->isPosition())
+    return 0;
+
+  unsigned Size = MI->getDesc().getSize();
+  if (!Size)
+    // Assume the default insn size in case it cannot be determined
+    // for whatever reason.
+    Size = HEXAGON_INSTR_SIZE;
+
+  if (isConstExtended(MI) || isExtended(MI))
+    Size += HEXAGON_INSTR_SIZE;
+
+  // Try and compute number of instructions in asm.
+  if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
+    const MachineBasicBlock &MBB = *MI->getParent();
+    const MachineFunction *MF = MBB.getParent();
+    const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+    // Count the number of register definitions to find the asm string.
+    unsigned NumDefs = 0;
+    for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+         ++NumDefs)
+      assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+    assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+    // Disassemble the AsmStr and approximate number of instructions.
+    const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+    Size = getInlineAsmLength(AsmStr, *MAI);
+  }
+
+  return Size;
+}
+
+
+uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
+}
+
+
+unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
+  const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+  const InstrItineraryData &II = *ST.getInstrItineraryData();
+  const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+
+  return IS.getUnits();
+}
+
+
+unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
+}
+
+
+// Calculate size of the basic block without debug instructions.
+unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
+  return nonDbgMICount(BB->instr_begin(), BB->instr_end());
+}
+
+
+unsigned HexagonInstrInfo::nonDbgBundleSize(
+      MachineBasicBlock::const_iterator BundleHead) const {
+  assert(BundleHead->isBundle() && "Not a bundle header");
+  auto MII = BundleHead.getInstrIterator();
+  // Skip the bundle header.
+  return nonDbgMICount(++MII, getBundleEnd(BundleHead));
+}
+
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+  assert((isExtendable(MI)||isConstExtended(MI)) &&
+                               "Instruction must be extendable");
+  // Find which operand is extendable.
+  short ExtOpNum = getCExtOpNum(MI);
+  MachineOperand &MO = MI->getOperand(ExtOpNum);
+  // This needs to be something we understand.
+  assert((MO.isMBB() || MO.isImm()) &&
+         "Branch with unknown extendable field type");
+  // Mark given operand as extended.
+  MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+
+bool HexagonInstrInfo::invertAndChangeJumpTarget(
+      MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+  DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
+               << NewTarget->getNumber(); MI->dump(););
+  assert(MI->isBranch());
+  unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
+  int TargetPos = MI->getNumOperands() - 1;
+  // In general branch target is the last operand,
+  // but some implicit defs added at the end might change it.
+  while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+    --TargetPos;
+  assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
+  MI->getOperand(TargetPos).setMBB(NewTarget);
+  if (EnableBranchPrediction && isPredicatedNew(MI)) {
+    NewOpcode = reversePrediction(NewOpcode);
+  }
+  MI->setDesc(get(NewOpcode));
+  return true;
+}
+
+
+void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
+  /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
+  MachineFunction::iterator A = MF.begin();
+  MachineBasicBlock &B = *A;
+  MachineBasicBlock::iterator I = B.begin();
+  MachineInstr *MI = &*I;
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstr *NewMI;
+
+  for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
+       insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
+    NewMI = BuildMI(B, MI, DL, get(insn));
+    DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
+          "  Class: " << NewMI->getDesc().getSchedClass());
+    NewMI->eraseFromParent();
+  }
+  /* --- The code above is used to generate complete set of Hexagon Insn --- */
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
+  DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
+  MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+  return true;
+}
+
+
+// Reverse the branch prediction.
+unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
+  int PredRevOpcode = -1;
+  if (isPredictedTaken(Opcode))
+    PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode);
+  else
+    PredRevOpcode = Hexagon::takenBranchPrediction(Opcode);
+  assert(PredRevOpcode > 0);
+  return PredRevOpcode;
+}
+
+
+// TODO: Add more rigorous validation.
+bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
+      const {
+  return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 0000000..9530d9f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,402 @@
+//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+
+#include "HexagonRegisterInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+struct EVT;
+class HexagonSubtarget;
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+  virtual void anchor();
+  const HexagonRegisterInfo RI;
+
+public:
+  explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+  /// TargetInstrInfo overrides.
+  ///
+
+  /// If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                               int &FrameIndex) const override;
+
+  /// If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                              int &FrameIndex) const override;
+
+  /// Analyze the branching code at the end of MBB, returning
+  /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+  /// implemented for a target).  Upon success, this returns false and returns
+  /// with the following information in various cases:
+  ///
+  /// 1. If this block ends with no branches (it just falls through to its succ)
+  ///    just return false, leaving TBB/FBB null.
+  /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+  ///    the destination block.
+  /// 3. If this block ends with a conditional branch and it falls through to a
+  ///    successor block, it sets TBB to be the branch destination block and a
+  ///    list of operands that evaluate the condition. These operands can be
+  ///    passed to other TargetInstrInfo methods to create new branches.
+  /// 4. If this block ends with a conditional branch followed by an
+  ///    unconditional branch, it returns the 'true' destination in TBB, the
+  ///    'false' destination in FBB, and a list of operands that evaluate the
+  ///    condition.  These operands can be passed to other TargetInstrInfo
+  ///    methods to create new branches.
+  ///
+  /// Note that RemoveBranch and InsertBranch must be implemented to support
+  /// cases where this method returns success.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
+  bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                         MachineBasicBlock *&FBB,
+                         SmallVectorImpl<MachineOperand> &Cond,
+                         bool AllowModify) const override;
+
+  /// Remove the branching code at the end of the specific MBB.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions that were removed.
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+  /// Insert branch code into the end of the specified MachineBasicBlock.
+  /// The operands to this method are the same as those
+  /// returned by AnalyzeBranch.  This is only invoked in cases where
+  /// AnalyzeBranch returns success. It returns the number of instructions
+  /// inserted.
+  ///
+  /// It is also invoked by tail merging to add unconditional branches in
+  /// cases where AnalyzeBranch doesn't apply because there was no original
+  /// branch to analyze.  At least this much must be implemented, else tail
+  /// merging needs to be disabled.
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+                        DebugLoc DL) const override;
+
+  /// Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
+  /// of the specified basic block, where the probability of the instructions
+  /// being executed is given by Probability, and Confidence is a measure
+  /// of our confidence that it will be properly predicted.
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                           unsigned ExtraPredCycles,
+                           BranchProbability Probability) const override;
+
+  /// Second variant of isProfitableToIfCvt. This one
+  /// checks for the case where two basic blocks from true and false path
+  /// of a if-then-else (diamond) are predicated on mutally exclusive
+  /// predicates, where the probability of the true path being taken is given
+  /// by Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                           unsigned NumTCycles, unsigned ExtraTCycles,
+                           MachineBasicBlock &FMBB,
+                           unsigned NumFCycles, unsigned ExtraFCycles,
+                           BranchProbability Probability) const override;
+
+  /// Return true if it's profitable for if-converter to duplicate instructions
+  /// of specified accumulated instruction latencies in the specified MBB to
+  /// enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                                 BranchProbability Probability) const override;
+
+  /// Emit instructions to copy a pair of physical registers.
+  ///
+  /// This function should support copies within any legal register class as
+  /// well as any cross-class copies created during instruction selection.
+  ///
+  /// The source and destination registers may overlap, which may require a
+  /// careful implementation when multiple copy instructions are required for
+  /// large registers. See for example the ARM target.
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const override;
+
+  /// Store the specified register of the given register class to the specified
+  /// stack frame index. The store instruction is to be added to the given
+  /// machine basic block before the specified machine instruction. If isKill
+  /// is true, the register operand is the last use and must be marked kill.
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const override;
+
+  /// Load the specified register of the given register class from the specified
+  /// stack frame index. The load instruction is to be added to the given
+  /// machine basic block before the specified machine instruction.
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const override;
+
+  /// This function is called for all pseudo instructions
+  /// that remain after register allocation. Many pseudo instructions are
+  /// created to help register allocation. This is the place to convert them
+  /// into real instructions. The target can edit MI in place, or it can insert
+  /// new instructions and erase MI. The function should return true if
+  /// anything was changed.
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  /// Reverses the branch condition of the specified condition list,
+  /// returning false on success and true if it cannot be reversed.
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+        const override;
+
+  /// Insert a noop into the instruction stream at the specified point.
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const override;
+
+  /// Returns true if the instruction is already predicated.
+  bool isPredicated(const MachineInstr *MI) const override;
+
+  /// Convert the instruction into a predicated instruction.
+  /// It returns true if the operation was successful.
+  bool PredicateInstruction(MachineInstr *MI,
+                            ArrayRef<MachineOperand> Cond) const override;
+
+  /// Returns true if the first specified predicate
+  /// subsumes the second, e.g. GE subsumes GT.
+  bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                         ArrayRef<MachineOperand> Pred2) const override;
+
+  /// If the specified instruction defines any predicate
+  /// or condition code register(s) used for predication, returns true as well
+  /// as the definition predicate(s) by reference.
+  bool DefinesPredicate(MachineInstr *MI,
+                        std::vector<MachineOperand> &Pred) const override;
+
+  /// Return true if the specified instruction can be predicated.
+  /// By default, this returns true for every instruction with a
+  /// PredicateOperand.
+  bool isPredicable(MachineInstr *MI) const override;
+
+  /// Test if the given instruction should be considered a scheduling boundary.
+  /// This primarily includes labels and terminators.
+  bool isSchedulingBoundary(const MachineInstr *MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const override;
+
+  /// Measure the specified inline asm to determine an approximation of its
+  /// length.
+  unsigned getInlineAsmLength(const char *Str,
+                              const MCAsmInfo &MAI) const override;
+
+  /// Allocate and return a hazard recognizer to use for this target when
+  /// scheduling the machine instructions after register allocation.
+  ScheduleHazardRecognizer*
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG *DAG) const override;
+
+  /// For a comparison instruction, return the source registers
+  /// in SrcReg and SrcReg2 if having two register operands, and the value it
+  /// compares against in CmpValue. Return true if the comparison instruction
+  /// can be analyzed.
+  bool analyzeCompare(const MachineInstr *MI,
+                      unsigned &SrcReg, unsigned &SrcReg2,
+                      int &Mask, int &Value) const override;
+
+  /// Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                           const MachineInstr *MI,
+                           unsigned *PredCost = 0) const override;
+
+  /// Create machine specific model for scheduling.
+  DFAPacketizer *
+  CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
+
+  // Sometimes, it is possible for the target
+  // to tell, even without aliasing information, that two MIs access different
+  // memory addresses. This function returns true if two MIs access different
+  // memory addresses and false otherwise.
+  bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+                                       AliasAnalysis *AA = nullptr)
+                                       const override;
+
+
+  /// HexagonInstrInfo specifics.
+  ///
+
+  const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+  unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+  bool isAbsoluteSet(const MachineInstr* MI) const;
+  bool isAccumulator(const MachineInstr *MI) const;
+  bool isComplex(const MachineInstr *MI) const;
+  bool isCompoundBranchInstr(const MachineInstr *MI) const;
+  bool isCondInst(const MachineInstr *MI) const;
+  bool isConditionalALU32 (const MachineInstr* MI) const;
+  bool isConditionalLoad(const MachineInstr* MI) const;
+  bool isConditionalStore(const MachineInstr* MI) const;
+  bool isConditionalTransfer(const MachineInstr* MI) const;
+  bool isConstExtended(const MachineInstr *MI) const;
+  bool isDeallocRet(const MachineInstr *MI) const;
+  bool isDependent(const MachineInstr *ProdMI,
+                   const MachineInstr *ConsMI) const;
+  bool isDotCurInst(const MachineInstr* MI) const;
+  bool isDotNewInst(const MachineInstr* MI) const;
+  bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
+  bool isEarlySourceInstr(const MachineInstr *MI) const;
+  bool isEndLoopN(unsigned Opcode) const;
+  bool isExpr(unsigned OpType) const;
+  bool isExtendable(const MachineInstr* MI) const;
+  bool isExtended(const MachineInstr* MI) const;
+  bool isFloat(const MachineInstr *MI) const;
+  bool isHVXMemWithAIndirect(const MachineInstr *I,
+                             const MachineInstr *J) const;
+  bool isIndirectCall(const MachineInstr *MI) const;
+  bool isIndirectL4Return(const MachineInstr *MI) const;
+  bool isJumpR(const MachineInstr *MI) const;
+  bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
+  bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+                                  const MachineInstr *ESMI) const;
+  bool isLateResultInstr(const MachineInstr *MI) const;
+  bool isLateSourceInstr(const MachineInstr *MI) const;
+  bool isLoopN(const MachineInstr *MI) const;
+  bool isMemOp(const MachineInstr *MI) const;
+  bool isNewValue(const MachineInstr* MI) const;
+  bool isNewValue(unsigned Opcode) const;
+  bool isNewValueInst(const MachineInstr* MI) const;
+  bool isNewValueJump(const MachineInstr* MI) const;
+  bool isNewValueJump(unsigned Opcode) const;
+  bool isNewValueStore(const MachineInstr* MI) const;
+  bool isNewValueStore(unsigned Opcode) const;
+  bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
+  bool isPostIncrement(const MachineInstr* MI) const;
+  bool isPredicatedNew(const MachineInstr *MI) const;
+  bool isPredicatedNew(unsigned Opcode) const;
+  bool isPredicatedTrue(const MachineInstr *MI) const;
+  bool isPredicatedTrue(unsigned Opcode) const;
+  bool isPredicated(unsigned Opcode) const;
+  bool isPredicateLate(unsigned Opcode) const;
+  bool isPredictedTaken(unsigned Opcode) const;
+  bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+  bool isSolo(const MachineInstr* MI) const;
+  bool isSpillPredRegOp(const MachineInstr *MI) const;
+  bool isTC1(const MachineInstr *MI) const;
+  bool isTC2(const MachineInstr *MI) const;
+  bool isTC2Early(const MachineInstr *MI) const;
+  bool isTC4x(const MachineInstr *MI) const;
+  bool isV60VectorInstruction(const MachineInstr *MI) const;
+  bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+  bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
+  bool isVecAcc(const MachineInstr *MI) const;
+  bool isVecALU(const MachineInstr *MI) const;
+  bool isVecUsableNextPacket(const MachineInstr *ProdMI,
+                             const MachineInstr *ConsMI) const;
+
+
+  bool canExecuteInBundle(const MachineInstr *First,
+                          const MachineInstr *Second) const;
+  bool hasEHLabel(const MachineBasicBlock *B) const;
+  bool hasNonExtEquivalent(const MachineInstr *MI) const;
+  bool hasPseudoInstrPair(const MachineInstr *MI) const;
+  bool hasUncondBranch(const MachineBasicBlock *B) const;
+  bool mayBeCurLoad(const MachineInstr* MI) const;
+  bool mayBeNewStore(const MachineInstr* MI) const;
+  bool producesStall(const MachineInstr *ProdMI,
+                     const MachineInstr *ConsMI) const;
+  bool producesStall(const MachineInstr *MI,
+                     MachineBasicBlock::const_instr_iterator MII) const;
+  bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+  bool PredOpcodeHasJMP_c(unsigned Opcode) const;
+  bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
+
+
+  unsigned getAddrMode(const MachineInstr* MI) const;
+  unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+                            unsigned &AccessSize) const;
+  bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
+                                unsigned &OffsetPos) const;
+  SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
+  unsigned getCExtOpNum(const MachineInstr *MI) const;
+  HexagonII::CompoundGroup
+  getCompoundCandidateGroup(const MachineInstr *MI) const;
+  unsigned getCompoundOpcode(const MachineInstr *GA,
+                             const MachineInstr *GB) const;
+  int getCondOpcode(int Opc, bool sense) const;
+  int getDotCurOp(const MachineInstr* MI) const;
+  int getDotNewOp(const MachineInstr* MI) const;
+  int getDotNewPredJumpOp(const MachineInstr *MI,
+                          const MachineBranchProbabilityInfo *MBPI) const;
+  int getDotNewPredOp(const MachineInstr *MI,
+                      const MachineBranchProbabilityInfo *MBPI) const;
+  int getDotOldOp(const int opc) const;
+  HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+                                                         const;
+  short getEquivalentHWInstr(const MachineInstr *MI) const;
+  MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
+  unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI) const;
+  bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
+  unsigned getInvertedPredicatedOpcode(const int Opc) const;
+  int getMaxValue(const MachineInstr *MI) const;
+  unsigned getMemAccessSize(const MachineInstr* MI) const;
+  int getMinValue(const MachineInstr *MI) const;
+  short getNonExtOpcode(const MachineInstr *MI) const;
+  bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
+                  unsigned &PredRegPos, unsigned &PredRegFlags) const;
+  short getPseudoInstrPair(const MachineInstr *MI) const;
+  short getRegForm(const MachineInstr *MI) const;
+  unsigned getSize(const MachineInstr *MI) const;
+  uint64_t getType(const MachineInstr* MI) const;
+  unsigned getUnits(const MachineInstr* MI) const;
+  unsigned getValidSubTargets(const unsigned Opcode) const;
+
+
+  /// getInstrTimingClassLatency - Compute the instruction latency of a given
+  /// instruction using Timing Class information, if available.
+  unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
+  unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
+
+
+  void immediateExtend(MachineInstr *MI) const;
+  bool invertAndChangeJumpTarget(MachineInstr* MI,
+                                 MachineBasicBlock* NewTarget) const;
+  void genAllInsnTimingClasses(MachineFunction &MF) const;
+  bool reversePredSense(MachineInstr* MI) const;
+  unsigned reversePrediction(unsigned Opcode) const;
+  bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 0000000..5cfeba7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,5809 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonOperands.td"
+include "HexagonInstrEnc.td"
+// Pattern fragment that combines the value type and the register class
+// into a single parameter.
+// The pat frags in the definitions below need to have a named register,
+// otherwise i32 will be assumed regardless of the register class. The
+// name of the register does not matter.
+def I1  : PatLeaf<(i1 PredRegs:$R)>;
+def I32 : PatLeaf<(i32 IntRegs:$R)>;
+def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
+def F32 : PatLeaf<(f32 IntRegs:$R)>;
+def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
+
+// Pattern fragments to extract the low and high subregisters from a
+// 64-bit value.
+def LoReg: OutPatFrag<(ops node:$Rs),
+                      (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+def HiReg: OutPatFrag<(ops node:$Rs),
+                      (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM1Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-2.
+def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-2 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM2Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-3.
+def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-3 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM3Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   uint32_t imm = N->getZExtValue();
+   return XformUToUM1Imm(imm, SDLoc(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Compare
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1,
+    opExtendable = 2 in
+class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
+  : ALU32Inst <(outs PredRegs:$dst),
+               (ins IntRegs:$src1, ImmOp:$src2),
+  "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)",
+  [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel {
+    bits<2> dst;
+    bits<5> src1;
+    bits<10> src2;
+    let CextOpcode = mnemonic;
+    let opExtentBits  = !if(!eq(mnemonic, "cmp.gtu"), 9, 10);
+    let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1);
+
+    let IClass = 0b0111;
+
+    let Inst{27-24} = 0b0101;
+    let Inst{23-22} = MajOp;
+    let Inst{21}    = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9});
+    let Inst{20-16} = src1;
+    let Inst{13-5}  = src2{8-0};
+    let Inst{4}     = isNot;
+    let Inst{3-2}   = 0b00;
+    let Inst{1-0}   = dst;
+  }
+
+def C2_cmpeqi   : T_CMP <"cmp.eq",  0b00, 0, s10Ext>;
+def C2_cmpgti   : T_CMP <"cmp.gt",  0b01, 0, s10Ext>;
+def C2_cmpgtui  : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>;
+
+class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
+  : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
+        (MI IntRegs:$src1, ImmPred:$src2)>;
+
+def : T_CMP_pat <C2_cmpeqi,  seteq,  s10ImmPred>;
+def : T_CMP_pat <C2_cmpgti,  setgt,  s10ImmPred>;
+def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf  : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL  : SDNode<"HexagonISD::PACKHL",  SDTHexagonI64I32I32>;
+
+let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
+class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
+                  bit IsComm>
+  : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+             "$Rd = "#mnemonic#"($Rs, $Rt)",
+             [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
+  let isCommutable = IsComm;
+  let BaseOpcode = mnemonic#_rr;
+  let CextOpcode = mnemonic;
+
+  bits<5> Rs;
+  bits<5> Rt;
+  bits<5> Rd;
+
+  let IClass = 0b1111;
+  let Inst{27} = 0b0;
+  let Inst{26-24} = MajOp;
+  let Inst{23-21} = MinOp;
+  let Inst{20-16} = !if(OpsRev,Rt,Rs);
+  let Inst{12-8} = !if(OpsRev,Rs,Rt);
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                       bit OpsRev, bit PredNot, bit PredNew>
+  : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+             "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
+             "$Rd = "#mnemonic#"($Rs, $Rt)",
+             [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+  let isPredicated = 1;
+  let isPredicatedFalse = PredNot;
+  let isPredicatedNew = PredNew;
+  let BaseOpcode = mnemonic#_rr;
+  let CextOpcode = mnemonic;
+
+  bits<2> Pu;
+  bits<5> Rs;
+  bits<5> Rt;
+  bits<5> Rd;
+
+  let IClass = 0b1111;
+  let Inst{27} = 0b1;
+  let Inst{26-24} = MajOp;
+  let Inst{23-21} = MinOp;
+  let Inst{20-16} = !if(OpsRev,Rt,Rs);
+  let Inst{13} = PredNew;
+  let Inst{12-8} = !if(OpsRev,Rs,Rt);
+  let Inst{7} = PredNot;
+  let Inst{6-5} = Pu;
+  let Inst{4-0} = Rd;
+}
+
+class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
+                      bit OpsRev>
+  : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> {
+  let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
+}
+
+def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
+def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
+def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
+def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
+
+class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
+                      bits<3> MinOp, bit OpsRev, bit IsComm>
+  : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> {
+  let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
+}
+
+def A2_svaddh   : T_ALU32_3op<"vaddh",   0b110, 0b000, 0, 1>;
+def A2_svsubh   : T_ALU32_3op<"vsubh",   0b110, 0b100, 1, 0>;
+
+let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
+  def A2_svaddhs  : T_ALU32_3op_sfx<"vaddh",  ":sat", 0b110, 0b001, 0, 1>;
+  def A2_addsat   : T_ALU32_3op_sfx<"add",    ":sat", 0b110, 0b010, 0, 1>;
+  def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
+  def A2_svsubhs  : T_ALU32_3op_sfx<"vsubh",  ":sat", 0b110, 0b101, 1, 0>;
+  def A2_subsat   : T_ALU32_3op_sfx<"sub",    ":sat", 0b110, 0b110, 1, 0>;
+  def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
+}
+
+let Itinerary = ALU32_3op_tc_2_SLOT0123 in
+def A2_svavghs  : T_ALU32_3op_sfx<"vavgh",  ":rnd", 0b111, 0b001, 0, 1>;
+
+def A2_svavgh   : T_ALU32_3op<"vavgh",   0b111, 0b000, 0, 1>;
+def A2_svnavgh  : T_ALU32_3op<"vnavgh",  0b111, 0b011, 1, 0>;
+
+multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                         bit OpsRev> {
+  def t    : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
+  def f    : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
+  def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
+  def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
+}
+
+multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                          bit OpsRev, bit IsComm> {
+  let isPredicable = 1 in
+  def  A2_#NAME  : T_ALU32_3op  <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
+  defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
+}
+
+defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
+defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
+defm or  : T_ALU32_3op_A2<"or",  0b001, 0b001, 0, 1>;
+defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
+defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
+
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+  : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+        (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or,  A2_or,  i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
+// A few special cases producing register pairs:
+let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
+  def S2_packhl    : T_ALU32_3op  <"packhl",  0b101, 0b100, 0, 0>;
+
+  let isPredicable = 1 in
+    def A2_combinew  : T_ALU32_3op  <"combine", 0b101, 0b000, 0, 0>;
+
+  // Conditional combinew uses "newt/f" instead of "t/fnew".
+  def C2_ccombinewt    : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
+  def C2_ccombinewf    : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
+  def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
+  def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
+}
+
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL,  S2_packhl,   i64>;
+
+let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg"  in
+class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
+  : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+             "$Pd = "#mnemonic#"($Rs, $Rt)",
+             [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+  let CextOpcode = mnemonic;
+  let isCommutable = IsComm;
+  bits<5> Rs;
+  bits<5> Rt;
+  bits<2> Pd;
+
+  let IClass = 0b1111;
+  let Inst{27-24} = 0b0010;
+  let Inst{22-21} = MinOp;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4} = IsNeg;
+  let Inst{3-2} = 0b00;
+  let Inst{1-0} = Pd;
+}
+
+let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
+  def C2_cmpeq   : T_ALU32_3op_cmp< "cmp.eq",  0b00, 0, 1>;
+  def C2_cmpgt   : T_ALU32_3op_cmp< "cmp.gt",  0b10, 0, 0>;
+  def C2_cmpgtu  : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
+}
+
+// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
+// that reverse the order of the operands.
+class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
+
+// Pats for compares. They use PatFrags as operands, not SDNodes,
+// since seteq/setgt/etc. are defined as ParFrags.
+class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
+  : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: T_cmp32_rr_pat<C2_cmpeq,  seteq, i1>;
+def: T_cmp32_rr_pat<C2_cmpgt,  setgt, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
+
+def: T_cmp32_rr_pat<C2_cmpgt,  RevCmp<setlt>,  i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
+
+let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
+def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
+                     (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let CextOpcode = "mux";
+  let InputType = "reg";
+  let hasSideEffects = 0;
+  let IClass = 0b1111;
+
+  let Inst{27-24} = 0b0100;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{6-5} = Pu;
+  let Inst{4-0} = Rd;
+}
+
+def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+         (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Combines the two immediates into a double register.
+// Increase complexity to make it greater than any complexity of a combine
+// that involves a register.
+
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+    isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
+    AddedComplexity = 75 in
+def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
+  "$Rdd = combine(#$s8, #$S8)",
+  [(set (i64 DoubleRegs:$Rdd),
+        (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
+    bits<5> Rdd;
+    bits<8> s8;
+    bits<8> S8;
+
+    let IClass = 0b0111;
+    let Inst{27-23} = 0b11000;
+    let Inst{22-16} = S8{7-1};
+    let Inst{13}    = S8{0};
+    let Inst{12-5}  = s8;
+    let Inst{4-0}   = Rdd;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated ADD of a reg and an Immediate value.
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri_Pred <bit PredNot, bit PredNew>
+  : ALU32_ri <(outs IntRegs:$Rd),
+              (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+  !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
+  ") $Rd = ")#"add($Rs, #$s8)"> {
+    bits<5> Rd;
+    bits<2> Pu;
+    bits<5> Rs;
+    bits<8> s8;
+
+    let isPredicatedNew = PredNew;
+    let IClass = 0b0111;
+
+    let Inst{27-24} = 0b0100;
+    let Inst{23}    = PredNot;
+    let Inst{22-21} = Pu;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = PredNew;
+    let Inst{12-5}  = s8;
+    let Inst{4-0}   = Rd;
+  }
+
+//===----------------------------------------------------------------------===//
+// A2_addi: Add a signed immediate to a register.
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_Addri <Operand immOp>
+  : ALU32_ri <(outs IntRegs:$Rd),
+              (ins IntRegs:$Rs, immOp:$s16),
+  "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<16> s16;
+
+    let IClass = 0b1011;
+
+    let Inst{27-21} = s16{15-9};
+    let Inst{20-16} = Rs;
+    let Inst{13-5}  = s16{8-0};
+    let Inst{4-0}   = Rd;
+  }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for ADD of a register and an immediate value.
+//===----------------------------------------------------------------------===//
+multiclass Addri_Pred<string mnemonic, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    def NAME     : T_Addri_Pred<PredNot, 0>;
+    // Predicate new
+    def NAME#new : T_Addri_Pred<PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
+multiclass Addri_base<string mnemonic, SDNode OpNode> {
+  let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
+    let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in
+    def A2_#NAME : T_Addri<s16Ext>;
+
+    let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
+      defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
+      defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
+    }
+  }
+}
+
+defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
+
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
+         (i32 (A2_addi I32:$Rs, imm:$s16))>;
+
+//===----------------------------------------------------------------------===//
+// Template class used for the following ALU32 instructions.
+// Rd=and(Rs,#s10)
+// Rd=or(Rs,#s10)
+//===----------------------------------------------------------------------===//
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", hasNewValue = 1 in
+class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
+  : ALU32_ri <(outs IntRegs:$Rd),
+              (ins IntRegs:$Rs, s10Ext:$s10),
+  "$Rd = "#mnemonic#"($Rs, #$s10)" ,
+  [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<10> s10;
+    let CextOpcode = mnemonic;
+
+    let IClass = 0b0111;
+
+    let Inst{27-24} = 0b0110;
+    let Inst{23-22} = MinOp;
+    let Inst{21}    = s10{9};
+    let Inst{20-16} = Rs;
+    let Inst{13-5}  = s10{8-0};
+    let Inst{4-0}   = Rd;
+  }
+
+def A2_orir  : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
+def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
+
+// Subtract register from immediate
+// Rd32=sub(#s10,Rs32)
+let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
+    opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
+def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+  "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
+    bits<5> Rd;
+    bits<10> s10;
+    bits<5> Rs;
+
+    let IClass = 0b0111;
+
+    let Inst{27-22} = 0b011001;
+    let Inst{21}    = s10{9};
+    let Inst{20-16} = Rs;
+    let Inst{13-5}  = s10{8-0};
+    let Inst{4-0}   = Rd;
+  }
+
+// Nop.
+let hasSideEffects = 0 in
+def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
+  let IClass = 0b0111;
+  let Inst{27-24} = 0b1111;
+}
+
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
+         (A2_subri imm:$s10, IntRegs:$Rs)>;
+
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def: Pat<(not (i32 IntRegs:$src1)),
+         (A2_subri -1, IntRegs:$src1)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_tfr16<bit isHi>
+  : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16),
+  "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
+  [], "$src1 = $Rx" > {
+    bits<5> Rx;
+    bits<16> u16;
+
+    let IClass = 0b0111;
+    let Inst{27-26} = 0b00;
+    let Inst{25-24} = !if(isHi, 0b10, 0b01);
+    let Inst{23-22} = u16{15-14};
+    let Inst{21}    = 0b1;
+    let Inst{20-16} = Rx;
+    let Inst{13-0}  = u16{13-0};
+  }
+
+def A2_tfril: T_tfr16<0>;
+def A2_tfrih: T_tfr16<1>;
+
+// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
+let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
+class T_tfr_pred<bit isPredNot, bit isPredNew>
+  : ALU32Inst<(outs IntRegs:$dst),
+              (ins PredRegs:$src1, IntRegs:$src2),
+              "if ("#!if(isPredNot, "!", "")#
+              "$src1"#!if(isPredNew, ".new", "")#
+              ") $dst = $src2"> {
+    bits<5> dst;
+    bits<2> src1;
+    bits<5> src2;
+
+    let isPredicatedFalse = isPredNot;
+    let isPredicatedNew = isPredNew;
+    let IClass = 0b0111;
+
+    let Inst{27-24} = 0b0100;
+    let Inst{23} = isPredNot;
+    let Inst{13} = isPredNew;
+    let Inst{12-5} = 0;
+    let Inst{4-0} = dst;
+    let Inst{22-21} = src1;
+    let Inst{20-16} = src2;
+  }
+
+let isPredicable = 1 in
+class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src),
+              "$dst = $src"> {
+    bits<5> dst;
+    bits<5> src;
+
+    let IClass = 0b0111;
+
+    let Inst{27-21} = 0b0000011;
+    let Inst{20-16} = src;
+    let Inst{13}    = 0b0;
+    let Inst{4-0}   = dst;
+  }
+
+let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in
+multiclass tfr_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    def NAME : T_tfr;
+
+    // Predicate
+    def t : T_tfr_pred<0, 0>;
+    def f : T_tfr_pred<1, 0>;
+    // Predicate new
+    def tnew : T_tfr_pred<0, 1>;
+    def fnew : T_tfr_pred<1, 1>;
+  }
+}
+
+// Assembler mapped to C2_ccombinew[t|f|newt|newf].
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+let isPredicated = 1 in
+class T_tfrp_pred<bit PredNot, bit PredNew>
+  : ALU32_rr <(outs DoubleRegs:$dst),
+              (ins PredRegs:$src1, DoubleRegs:$src2),
+  "if ("#!if(PredNot, "!", "")#"$src1"
+        #!if(PredNew, ".new", "")#") $dst = $src2" > {
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = PredNew;
+  }
+
+// Assembler mapped to A2_combinew.
+// Please don't add bits to this instruction as it'll be converted into
+// 'combine' before object code emission.
+class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst),
+               (ins DoubleRegs:$src),
+    "$dst = $src">;
+
+let hasSideEffects = 0 in
+multiclass TFR64_base<string BaseName> {
+  let BaseOpcode = BaseName in {
+    let isPredicable = 1 in
+    def NAME : T_tfrp;
+    // Predicate
+    def t : T_tfrp_pred <0, 0>;
+    def f : T_tfrp_pred <1, 0>;
+    // Predicate new
+    def tnew : T_tfrp_pred <0, 1>;
+    def fnew : T_tfrp_pred <1, 1>;
+  }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
+    isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
+    hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
+class T_TFRI_Pred<bit PredNot, bit PredNew>
+  : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12),
+    "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
+    [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+  let isPredicatedFalse = PredNot;
+  let isPredicatedNew = PredNew;
+
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<12> s12;
+
+  let IClass = 0b0111;
+  let Inst{27-24} = 0b1110;
+  let Inst{23} = PredNot;
+  let Inst{22-21} = Pu;
+  let Inst{20} = 0b0;
+  let Inst{19-16,12-5} = s12;
+  let Inst{13} = PredNew;
+  let Inst{4-0} = Rd;
+}
+
+def C2_cmoveit    : T_TFRI_Pred<0, 0>;
+def C2_cmoveif    : T_TFRI_Pred<1, 0>;
+def C2_cmovenewit : T_TFRI_Pred<0, 1>;
+def C2_cmovenewif : T_TFRI_Pred<1, 1>;
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
+    CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
+    isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
+    isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
+def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
+    [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+    ImmRegRel, PredRel {
+  bits<5> Rd;
+  bits<16> s16;
+
+  let IClass = 0b0111;
+  let Inst{27-24} = 0b1000;
+  let Inst{23-22,20-16,13-5} = s16;
+  let Inst{4-0} = Rd;
+}
+
+defm A2_tfr  : tfr_base<"TFR">, ImmRegRel, PredNewRel;
+let isAsmParserOnly = 1 in
+defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
+
+// Assembler mapped
+let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+    isAsmParserOnly = 1 in
+def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+                      "$dst = #$src1",
+                      [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
+
+// TODO: see if this instruction can be deleted..
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+    isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
+                         "$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+                             (ins s8Ext:$src1, s8Imm:$src2),
+                             "$dst = combine(##$src1, #$src2)">;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+// Scalar mux register immediate.
+let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX",
+    InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in
+class T_MUX1 <bit MajOp, dag ins, string AsmStr>
+      : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel {
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<8> s8;
+  bits<5> Rs;
+
+  let IClass = 0b0111;
+  let Inst{27-24} = 0b0011;
+  let Inst{23} = MajOp;
+  let Inst{22-21} = Pu;
+  let Inst{20-16} = Rs;
+  let Inst{13}    = 0b0;
+  let Inst{12-5}  = s8;
+  let Inst{4-0}   = Rd;
+}
+
+let opExtendable = 2 in
+def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
+                           "$Rd = mux($Pu, #$s8, $Rs)">;
+
+let opExtendable = 3 in
+def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+                           "$Rd = mux($Pu, $Rs, #$s8)">;
+
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+          (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
+
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+          (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
+
+// C2_muxii: Scalar mux immediates.
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
+    opExtentBits = 8, opExtendable = 2 in
+def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
+                         (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
+  "$Rd = mux($Pu, #$s8, #$S8)" ,
+  [(set (i32 IntRegs:$Rd),
+        (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
+    bits<5> Rd;
+    bits<2> Pu;
+    bits<8> s8;
+    bits<8> S8;
+
+    let IClass = 0b0111;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-23} = Pu;
+    let Inst{22-16} = S8{7-1};
+    let Inst{13}    = S8{0};
+    let Inst{12-5}  = s8;
+    let Inst{4-0}   = Rd;
+  }
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+               (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+               ".error \"should not emit\" ", []>;
+
+
+//===----------------------------------------------------------------------===//
+// template class for non-predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxth
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op <string mnemonic, bits<3> minOp> :
+  ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+             "$Rd = "#mnemonic#"($Rs)", [] > {
+  bits<5> Rd;
+  bits<5> Rs;
+
+  let IClass = 0b0111;
+
+  let Inst{27-24} = 0b0000;
+  let Inst{23-21} = minOp;
+  let Inst{13} = 0b0;
+  let Inst{4-0} = Rd;
+  let Inst{20-16} = Rs;
+}
+
+//===----------------------------------------------------------------------===//
+// template class for predicated alu32_2op instructions
+// - aslh, asrh, sxtb, sxth, zxtb, zxth
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
+                        bit isPredNew > :
+  ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
+             !if(isPredNot, "if (!$Pu", "if ($Pu")
+             #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<5> Rs;
+
+  let IClass = 0b0111;
+
+  let Inst{27-24} = 0b0000;
+  let Inst{23-21} = minOp;
+  let Inst{13} = 0b1;
+  let Inst{11} = isPredNot;
+  let Inst{10} = isPredNew;
+  let Inst{4-0} = Rd;
+  let Inst{9-8} = Pu;
+  let Inst{20-16} = Rs;
+}
+
+multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>;
+
+    // Predicate new
+    let isPredicatedNew = 1 in
+    def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>;
+  }
+}
+
+multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
+  let BaseOpcode = mnemonic in {
+    let isPredicable = 1, hasSideEffects = 0 in
+    def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
+
+    let isPredicated = 1, hasSideEffects = 0 in {
+      defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+      defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
+    }
+  }
+}
+
+defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
+defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
+defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
+defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
+defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
+
+// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
+// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
+// predicated forms while 'and' doesn't. Since integrated assembler can't
+// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
+// immediate operand is set to '255'.
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
+  "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255)
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<10> s10 = 255;
+
+    let IClass = 0b0111;
+
+    let Inst{27-22} = 0b011000;
+    let Inst{4-0} = Rd;
+    let Inst{20-16} = Rs;
+    let Inst{21} = s10{9};
+    let Inst{13-5} = s10{8-0};
+}
+
+//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255)
+multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
+  let BaseOpcode = mnemonic in {
+    let isPredicable = 1, hasSideEffects = 0 in
+    def A2_#NAME : T_ZXTB;
+
+    let isPredicated = 1, hasSideEffects = 0 in {
+      defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
+      defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
+    }
+  }
+}
+
+defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
+
+def: Pat<(shl I32:$src1, (i32 16)),   (A2_aslh I32:$src1)>;
+def: Pat<(sra I32:$src1, (i32 16)),   (A2_asrh I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i8),  (A2_sxtb I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// Template class for vector add and avg
+//===----------------------------------------------------------------------===//
+
+class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
+                   bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
+  : ALU64_rr < (outs DoubleRegs:$Rdd),
+                (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
+                             #!if(isCrnd,":crnd","")
+                             #!if(isSat, ":sat", ""),
+  [], "", ALU64_tc_2_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b0011;
+    let Inst{23-21} = majOp;
+    let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+    let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+    let Inst{7-5} = minOp;
+    let Inst{4-0} = Rdd;
+  }
+
+// ALU64 - Vector add
+// Rdd=vadd[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vaddub  : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
+  def A2_vaddh   : T_VectALU_64 < "vaddh",  0b000, 0b010, 0, 0, 0, 0>;
+  def A2_vaddw   : T_VectALU_64 < "vaddw",  0b000, 0b101, 0, 0, 0, 0>;
+}
+
+// Rdd=vadd[u][bhw](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+  def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
+  def A2_vaddhs  : T_VectALU_64 < "vaddh",  0b000, 0b011, 1, 0, 0, 0>;
+  def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
+  def A2_vaddws  : T_VectALU_64 < "vaddw",  0b000, 0b110, 1, 0, 0, 0>;
+}
+
+// ALU64 - Vector average
+// Rdd=vavg[u][bhw](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
+  def A2_vavgh  : T_VectALU_64 < "vavgh",  0b010, 0b010, 0, 0, 0, 0>;
+  def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
+  def A2_vavgw  : T_VectALU_64 < "vavgw",  0b011, 0b000, 0, 0, 0, 0>;
+  def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
+}
+
+// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
+def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
+def A2_vavghr  : T_VectALU_64 < "vavgh",  0b010, 0b011, 0, 1, 0, 0>;
+def A2_vavghcr : T_VectALU_64 < "vavgh",  0b010, 0b100, 0, 0, 1, 0>;
+def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
+
+def A2_vavgwr  : T_VectALU_64 < "vavgw",  0b011, 0b001, 0, 1, 0, 0>;
+def A2_vavgwcr : T_VectALU_64 < "vavgw",  0b011, 0b010, 0, 0, 1, 0>;
+def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
+
+// Rdd=vnavg[bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vnavgh   : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
+  def A2_vnavgw   : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
+}
+
+// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
+let Defs = [USR_OVF] in {
+  def A2_vnavghr  : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
+  def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
+  def A2_vnavgwr  : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
+  def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt)
+let Itinerary = ALU64_tc_1_SLOT23 in {
+  def A2_vsubub  : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
+  def A2_vsubh   : T_VectALU_64 < "vsubh",  0b001, 0b010, 0, 0, 0, 1>;
+  def A2_vsubw   : T_VectALU_64 < "vsubw",  0b001, 0b101, 0, 0, 0, 1>;
+}
+
+// Rdd=vsub[u][bh](Rss,Rtt):sat
+let Defs = [USR_OVF] in {
+  def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
+  def A2_vsubhs  : T_VectALU_64 < "vsubh",  0b001, 0b011, 1, 0, 0, 1>;
+  def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
+  def A2_vsubws  : T_VectALU_64 < "vsubw",  0b001, 0b110, 1, 0, 0, 1>;
+}
+
+// Rdd=vmax[u][bhw](Rss,Rtt)
+def A2_vmaxb  : T_VectALU_64 < "vmaxb",  0b110, 0b110, 0, 0, 0, 1>;
+def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
+def A2_vmaxh  : T_VectALU_64 < "vmaxh",  0b110, 0b001, 0, 0, 0, 1>;
+def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
+def A2_vmaxw  : T_VectALU_64 < "vmaxw",  0b110, 0b011, 0, 0, 0, 1>;
+def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
+
+// Rdd=vmin[u][bhw](Rss,Rtt)
+def A2_vminb  : T_VectALU_64 < "vminb",  0b110, 0b111, 0, 0, 0, 1>;
+def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
+def A2_vminh  : T_VectALU_64 < "vminh",  0b101, 0b001, 0, 0, 0, 1>;
+def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
+def A2_vminw  : T_VectALU_64 < "vminw",  0b101, 0b011, 0, 0, 0, 1>;
+def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_vcmp <string Str, bits<4> minOp>
+  : ALU64_rr <(outs PredRegs:$Pd),
+              (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Pd = "#Str#"($Rss, $Rtt)", [],
+  "", ALU64_tc_2early_SLOT23> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b00100;
+    let Inst{13} = minOp{3};
+    let Inst{7-5} = minOp{2-0};
+    let Inst{1-0} = Pd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+  : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+        (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+// Vector compare bytes
+def A2_vcmpbeq  : T_vcmp <"vcmpb.eq",  0b0110>;
+def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
+
+// Vector compare halfwords
+def A2_vcmpheq  : T_vcmp <"vcmph.eq",  0b0011>;
+def A2_vcmphgt  : T_vcmp <"vcmph.gt",  0b0100>;
+def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
+
+// Vector compare words
+def A2_vcmpweq  : T_vcmp <"vcmpw.eq",  0b0000>;
+def A2_vcmpwgt  : T_vcmp <"vcmpw.gt",  0b0001>;
+def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
+
+def: T_vcmp_pat<A2_vcmpbeq,  seteq,  v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq,  seteq,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgt,  setgt,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq,  seteq,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt,  setgt,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+// No bits needed.  If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+  "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+  "$Pd = cmp.geu($Rs, #$s8)">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+//===----------------------------------------------------------------------===//
+// Template Class
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+let  hasNewValue = 1, opNewValue = 0 in
+class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
+  : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+  "$Rd = "#!if(isSub,"sub","add")#"($Rt."
+          #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs."
+          #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)"))
+          #!if(isSat,":sat","")
+          #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rt;
+    bits<5> Rs;
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b01010;
+    let Inst{22} = hasShift;
+    let Inst{21} = isSub;
+    let Inst{7} = isSat;
+    let Inst{6-5} = LHbits;
+    let Inst{4-0} = Rd;
+    let Inst{12-8} = Rt;
+    let Inst{20-16} = Rs;
+  }
+
+//Rd=sub(Rt.L,Rs.[LH])
+def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
+def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
+
+//Rd=add(Rt.L,Rs.[LH])
+def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
+def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
+
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
+  //Rd=sub(Rt.L,Rs.[LH]):sat
+  def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
+  def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
+
+  //Rd=add(Rt.L,Rs.[LH]):sat
+  def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>;
+  def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>;
+}
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
+def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
+def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
+def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
+def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
+def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
+def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
+
+let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
+  //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+  def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
+  def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
+  def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>;
+  def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>;
+
+  //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+  def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>;
+  def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>;
+  def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>;
+  def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
+}
+
+// Add halfword.
+def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
+         (A2_addh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
+         (A2_addh_l16_hl I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
+         (A2_addh_h16_ll I32:$src1, I32:$src2)>;
+
+// Subtract halfword.
+def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
+         (A2_subh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
+         (A2_subh_h16_ll I32:$src1, I32:$src2)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
+      (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+      "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0000;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
+  : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
+  "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+          #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rt;
+    bits<5> Rs;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b01011;
+    let Inst{22-21} = !if(isMax, 0b10, 0b01);
+    let Inst{7} = isUnsigned;
+    let Inst{4-0} = Rd;
+    let Inst{12-8} = !if(isMax, Rs, Rt);
+    let Inst{20-16} = !if(isMax, Rt, Rs);
+  }
+
+def A2_min  : T_XTYPE_MIN_MAX < 0, 0 >;
+def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
+def A2_max  : T_XTYPE_MIN_MAX < 1, 0 >;
+def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
+
+// Here, depending on  the operand being selected, we'll either generate a
+// min or max instruction.
+// Ex:
+// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
+// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
+// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
+// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
+
+multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
+                          InstHexagon Inst, InstHexagon SwapInst> {
+  def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+                   (VT RC:$src1), (VT RC:$src2)),
+           (Inst RC:$src1, RC:$src2)>;
+  def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+                   (VT RC:$src2), (VT RC:$src1)),
+           (SwapInst RC:$src1, RC:$src2)>;
+}
+
+
+multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+  defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
+
+  def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+                                            (i32 PositiveHalfWord:$src2))),
+                                    (i32 PositiveHalfWord:$src1),
+                                    (i32 PositiveHalfWord:$src2))), i16),
+           (Inst IntRegs:$src1, IntRegs:$src2)>;
+
+  def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+                                            (i32 PositiveHalfWord:$src2))),
+                                    (i32 PositiveHalfWord:$src2),
+                                    (i32 PositiveHalfWord:$src1))), i16),
+           (SwapInst IntRegs:$src1, IntRegs:$src2)>;
+}
+
+let AddedComplexity = 200 in {
+  defm: MinMax_pats<setge,  A2_max,  A2_min>;
+  defm: MinMax_pats<setgt,  A2_max,  A2_min>;
+  defm: MinMax_pats<setle,  A2_min,  A2_max>;
+  defm: MinMax_pats<setlt,  A2_min,  A2_max>;
+  defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
+  defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
+  defm: MinMax_pats<setule, A2_minu, A2_maxu>;
+  defm: MinMax_pats<setult, A2_minu, A2_maxu>;
+}
+
+class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
+  : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+             "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
+  let isCompare = 1;
+  let isCommutable = IsComm;
+  let hasSideEffects = 0;
+
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0010100;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7-5} = MinOp;
+  let Inst{1-0} = Pd;
+}
+
+def C2_cmpeqp  : T_cmp64_rr<"cmp.eq",  0b000, 1>;
+def C2_cmpgtp  : T_cmp64_rr<"cmp.gt",  0b010, 0>;
+def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
+
+class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
+  : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
+        (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
+
+def: T_cmp64_rr_pat<C2_cmpeqp,  seteq>;
+def: T_cmp64_rr_pat<C2_cmpgtp,  setgt>;
+def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
+def: T_cmp64_rr_pat<C2_cmpgtp,  RevCmp<setlt>>;
+def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+
+def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
+      (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+      "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+  let hasSideEffects = 0;
+
+  bits<5> Rd;
+  bits<2> Pu;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0001;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{6-5} = Pu;
+  let Inst{4-0} = Rd;
+}
+
+class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
+                 bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
+                 string Op2Pfx>
+  : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+             "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [],
+             "", ALU64_tc_1_SLOT23> {
+  let hasSideEffects = 0;
+  let isCommutable = IsComm;
+
+  bits<5> Rs;
+  bits<5> Rt;
+  bits<5> Rd;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = RegType;
+  let Inst{23-21} = MajOp;
+  let Inst{20-16} = !if (OpsRev,Rt,Rs);
+  let Inst{12-8} = !if (OpsRev,Rs,Rt);
+  let Inst{7-5} = MinOp;
+  let Inst{4-0} = Rd;
+}
+
+class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
+                    bit OpsRev, bit IsComm>
+  : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
+               IsComm, "">;
+
+def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
+def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
+
+def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
+
+class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
+                      bit IsNeg>
+  : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
+               !if(IsNeg,"~","")>;
+
+def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
+def A2_orp  : T_ALU64_logical<"or",  0b010, 0, 1, 0>;
+def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
+
+def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (or  I64:$Rs, I64:$Rt)), (A2_orp  I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+let hasSideEffects = 0 in
+class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
+    : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps),
+             "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<2> Ps;
+
+  let IClass = 0b0110;
+  let Inst{27-23} = 0b10111;
+  let Inst{22-21} = OpBits;
+  let Inst{20} = 0b0;
+  let Inst{17-16} = Ps;
+  let Inst{13} = 0b0;
+  let Inst{1-0} = Pd;
+}
+
+def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
+def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
+def C2_not  : T_LOGICAL_1OP<"not",  0b10>;
+
+def: Pat<(i1 (not (i1 PredRegs:$Ps))),
+         (C2_not PredRegs:$Ps)>;
+
+let hasSideEffects = 0 in
+class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
+    : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
+             "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)",
+             [], "", CR_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<2> Ps;
+  bits<2> Pt;
+
+  let IClass = 0b0110;
+  let Inst{27-24} = 0b1011;
+  let Inst{23-21} = OpBits;
+  let Inst{20} = 0b0;
+  let Inst{17-16} = !if(Rev,Pt,Ps);  // Rs and Rt are reversed for some
+  let Inst{13} = 0b0;                // instructions.
+  let Inst{9-8} = !if(Rev,Ps,Pt);
+  let Inst{1-0} = Pd;
+}
+
+def C2_and  : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
+def C2_or   : T_LOGICAL_2OP<"or",  0b001, 0, 1>;
+def C2_xor  : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
+def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
+def C2_orn  : T_LOGICAL_2OP<"or",  0b111, 1, 1>;
+
+def: Pat<(i1 (and I1:$Ps, I1:$Pt)),       (C2_and  I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or  I1:$Ps, I1:$Pt)),       (C2_or   I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (xor I1:$Ps, I1:$Pt)),       (C2_xor  I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or  I1:$Ps, (not I1:$Pt))), (C2_orn  I1:$Ps, I1:$Pt)>;
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
+      "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<2> Ps;
+  bits<2> Pt;
+
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b1001;
+  let Inst{22-21} = 0b00;
+  let Inst{17-16} = Ps;
+  let Inst{9-8} = Pt;
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
+      "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<2> Pt;
+
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b0110;
+  let Inst{9-8} = Pt;
+  let Inst{4-0} = Rd;
+}
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
+
+class CondStr<string CReg, bit True, bit New> {
+  string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
+}
+class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+  string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", ""));
+}
+
+let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
+    isPredicable = 1,
+    isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+    opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
+class T_JMP<string ExtStr>
+  : JInst<(outs), (ins brtarget:$dst),
+      "jump " # ExtStr # "$dst",
+      [], "", J_tc_2early_SLOT23> {
+    bits<24> dst;
+    let IClass = 0b0101;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-16} = dst{23-15};
+    let Inst{13-1} = dst{14-2};
+}
+
+let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1,
+    isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+    opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in
+class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
+  : JInst<(outs), (ins PredRegs:$src, brtarget:$dst),
+      CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+        JumpOpcStr<"jump", isPredNew, isTak>.S # " " #
+        ExtStr # "$dst",
+      [], "", J_tc_2early_SLOT23>, ImmRegRel {
+    let isTaken = isTak;
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<17> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{21} = PredNot;
+    let Inst{12} = isTak;
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+    let Inst{23-22} = dst{16-15};
+    let Inst{20-16} = dst{14-10};
+    let Inst{13} = dst{9};
+    let Inst{7-1} = dst{8-2};
+  }
+
+multiclass JMP_Pred<bit PredNot, string ExtStr> {
+  def NAME       : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
+  // Predicate new
+  def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
+  def NAME#new   : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
+}
+
+multiclass JMP_base<string BaseOp, string ExtStr> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMP<ExtStr>;
+    defm t : JMP_Pred<0, ExtStr>;
+    defm f : JMP_Pred<1, ExtStr>;
+  }
+}
+
+// Jumps to address stored in a register, JUMPR_MISC
+// if ([[!]P[.new]]) jumpr[:t/nt] Rs
+let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
+    isPredicable = 1, hasSideEffects = 0, InputType = "reg" in
+class T_JMPr
+  : JRInst<(outs), (ins IntRegs:$dst),
+      "jumpr $dst", [], "", J_tc_2early_SLOT2> {
+    bits<5> dst;
+
+    let IClass = 0b0101;
+    let Inst{27-21} = 0b0010100;
+    let Inst{20-16} = dst;
+}
+
+let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
+    hasSideEffects = 0, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
+  : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst),
+      CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+        JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [],
+      "", J_tc_2early_SLOT2> {
+
+    let isTaken = isTak;
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<5> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-22} = 0b001101;
+    let Inst{21} = PredNot;
+    let Inst{20-16} = dst;
+    let Inst{12} = isTak;
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+}
+
+multiclass JMPR_Pred<bit PredNot> {
+  def NAME        : T_JMPr_c<PredNot, 0, 0>; // not taken
+  // Predicate new
+  def NAME#newpt  : T_JMPr_c<PredNot, 1, 1>; // taken
+  def NAME#new    : T_JMPr_c<PredNot, 1, 0>; // not taken
+}
+
+multiclass JMPR_base<string BaseOp> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMPr;
+    defm t : JMPR_Pred<0>;
+    defm f : JMPR_Pred<1>;
+  }
+}
+
+let isCall = 1, hasSideEffects = 1 in
+class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
+               dag InputDag = (ins IntRegs:$Rs)>
+  : JRInst<(outs), InputDag,
+      !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
+                                 "if ($Pu) callr $Rs"),
+                                 "callr $Rs"),
+      [], "", J_tc_2early_SLOT2> {
+    bits<5> Rs;
+    bits<2> Pu;
+    let isPredicated = isPred;
+    let isPredicatedFalse = isPredNot;
+
+    let IClass = 0b0101;
+    let Inst{27-25} = 0b000;
+    let Inst{24-23} = !if (isPred, 0b10, 0b01);
+    let Inst{22} = 0;
+    let Inst{21} = isPredNot;
+    let Inst{9-8} = !if (isPred, Pu, 0b00);
+    let Inst{20-16} = Rs;
+
+  }
+
+let Defs = VolatileV3.Regs in {
+  def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
+  def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
+}
+
+let isTerminator = 1, hasSideEffects = 0 in {
+  defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
+
+  // Deal with explicit assembly
+  //  - never extened a jump #,  always extend a jump ##
+  let isAsmParserOnly = 1 in {
+    defm J2_jump_ext   : JMP_base<"JMP", "##">;
+    defm J2_jump_noext : JMP_base<"JMP", "#">;
+  }
+
+  defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
+
+  let isReturn = 1, isCodeGenOnly = 1 in
+  defm JMPret : JMPR_base<"JMPret">, PredNewRel;
+}
+
+def: Pat<(br bb:$dst),
+         (J2_jump brtarget:$dst)>;
+def: Pat<(retflag),
+         (JMPret (i32 R31))>;
+def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
+         (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
+    isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+def: Pat<(eh_return),
+         (EH_RETURN_JMPR (i32 R31))>;
+def: Pat<(brind (i32 IntRegs:$dst)),
+         (J2_jumpr IntRegs:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+
+// Load - Base with Immediate offset addressing mode
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
+class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
+                 Operand ImmOp>
+  : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+  "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel {
+    bits<4> name;
+    bits<5> dst;
+    bits<5> src1;
+    bits<14> offset;
+    bits<11> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3},
+                     !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2},
+                     !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1},
+                                      /* s11_0Ext */ offset{10-0})));
+    let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+                       !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+                       !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+                                        /* s11_0Ext */ 11)));
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+    let IClass = 0b1001;
+
+    let Inst{27}    = 0b0;
+    let Inst{26-25} = offsetBits{10-9};
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13-5}  = offsetBits{8-0};
+    let Inst{4-0}   = dst;
+  }
+
+let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in
+class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp,
+                  Operand ImmOp, bit isNot, bit isPredNew>
+  : LDInst<(outs RC:$dst),
+           (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  "if ("#!if(isNot, "!$src1", "$src1")
+       #!if(isPredNew, ".new", "")
+       #") $dst = "#mnemonic#"($src2 + #$offset)",
+  [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel {
+    bits<5> dst;
+    bits<2> src1;
+    bits<5> src2;
+    bits<9> offset;
+    bits<6> offsetBits;
+    string ImmOpStr = !cast<string>(ImmOp);
+
+    let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3},
+                     !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2},
+                     !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1},
+                                      /* u6_0Ext */ offset{5-0})));
+    let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+                       !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+                       !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+                                        /* u6_0Ext */ 6)));
+    let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1);
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isNot;
+
+    let IClass = 0b0100;
+
+    let Inst{27}    = 0b0;
+    let Inst{27}    = 0b0;
+    let Inst{26}    = isNot;
+    let Inst{25}    = isPredNew;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13}    = 0b0;
+    let Inst{12-11} = src1;
+    let Inst{10-5}  = offsetBits;
+    let Inst{4-0}   = dst;
+  }
+
+let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<4>MajOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let isPredicable = 1 in
+    def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>;
+
+    // Predicated
+    def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>;
+    def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>;
+
+    // Predicated new
+    def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>;
+    def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>;
+  }
+}
+
+let accessSize = ByteAccess in {
+  defm loadrb:  LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
+  defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  defm loadrh:  LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
+  defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
+}
+
+let accessSize = WordAccess, opExtentAlign = 2 in
+defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
+
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
+defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  def L2_loadbsw2_io:   T_load_io<"membh",  IntRegs, 0b0001, s11_1Ext>;
+  def L2_loadbzw2_io:   T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
+}
+
+let accessSize = WordAccess, opExtentAlign = 2 in {
+  def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
+  def L2_loadbsw4_io: T_load_io<"membh",  DoubleRegs, 0b0111, s11_2Ext>;
+}
+
+let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
+    opExtendable = 3, isExtentSigned = 1  in
+class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
+  : LDInst<(outs DoubleRegs:$dst),
+           (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  "$dst = "#str#"($src2 + #$offset)", [],
+  "$src1 = $dst">, AddrModeRel {
+    bits<4> name;
+    bits<5> dst;
+    bits<5> src2;
+    bits<12> offset;
+    bits<11> offsetBits;
+
+    let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
+                                                  /* s11_0Ext */ offset{10-0});
+    let IClass = 0b1001;
+
+    let Inst{27}    = 0b0;
+    let Inst{26-25} = offsetBits{10-9};
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13-5}  = offsetBits{8-0};
+    let Inst{4-0}   = dst;
+  }
+
+let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
+def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
+
+let accessSize = ByteAccess, opExtentBits = 11 in
+def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
+
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+                     InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+  def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+           (VT (MI AddrFI:$fi, imm:$Off))>;
+  def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
+           (VT (MI IntRegs:$Rs, imm:$Off))>;
+  def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
+}
+
+let AddedComplexity = 20 in {
+  defm: Loadx_pat<load,           i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<load,           i64, s29_3ImmPred, L2_loadrd_io>;
+  defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+  defm: Loadx_pat<extloadi1,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi8,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi16,     i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<sextloadi8,     i32, s32_0ImmPred, L2_loadrb_io>;
+  defm: Loadx_pat<sextloadi16,    i32, s31_1ImmPred, L2_loadrh_io>;
+  defm: Loadx_pat<zextloadi1,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi8,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi16,    i32, s31_1ImmPred, L2_loadruh_io>;
+  // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
+         (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Post increment load
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+                     bits<4> MajOp >
+  : LDInstPI <(outs RC:$dst, IntRegs:$dst2),
+  (ins IntRegs:$src1, ImmOp:$offset),
+  "$dst = "#mnemonic#"($src1++#$offset)" ,
+  [],
+  "$src1 = $dst2" > ,
+  PredNewRel {
+    bits<5> dst;
+    bits<5> src1;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+                     !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0})));
+    let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13-12} = 0b00;
+    let Inst{8-5} = offsetBits;
+    let Inst{4-0}   = dst;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+                          bits<4> MajOp, bit isPredNot, bit isPredNew >
+  : LDInst <(outs RC:$dst, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
+  [] ,
+  "$src2 = $dst2" > ,
+  PredNewRel {
+    bits<5> dst;
+    bits<2> src1;
+    bits<5> src2;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isPredNot;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+                     !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0})));
+    let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13} = 0b1;
+    let Inst{12} = isPredNew;
+    let Inst{11} = isPredNot;
+    let Inst{10-9} = src1;
+    let Inst{8-5}  = offsetBits;
+    let Inst{4-0}  = dst;
+  }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for post increment loads with immediate offset.
+//===----------------------------------------------------------------------===//
+
+multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
+                       Operand ImmOp, bits<4> MajOp> {
+  let BaseOpcode = "POST_"#BaseOp in {
+    let isPredicable = 1 in
+    def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>;
+
+    // Predicated
+    def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>;
+    def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>;
+
+    // Predicated new
+    def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>;
+    def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>;
+  }
+}
+
+// post increment byte loads with immediate offset
+let accessSize = ByteAccess in {
+  defm loadrb  : LD_PostInc <"memb",  "LDrib", IntRegs, s4_0Imm, 0b1000>;
+  defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
+}
+
+// post increment halfword loads with immediate offset
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  defm loadrh  : LD_PostInc <"memh",  "LDrih", IntRegs, s4_1Imm, 0b1010>;
+  defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
+}
+
+// post increment word loads with immediate offset
+let accessSize = WordAccess, opExtentAlign = 2 in
+defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
+
+// post increment doubleword loads with immediate offset
+let accessSize = DoubleWordAccess, opExtentAlign = 3 in
+defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
+
+// Rd=memb[u]h(Rx++#s4:1)
+// Rdd=memb[u]h(Rx++#s4:2)
+let accessSize = HalfWordAccess, opExtentAlign = 1 in {
+  def L2_loadbsw2_pi   : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
+  def L2_loadbzw2_pi   : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
+}
+let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
+  def L2_loadbsw4_pi   : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
+  def L2_loadbzw4_pi   : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment fifo loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
+  : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
+  (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+  "$dst = "#mnemonic#"($src2++#$offset)" ,
+  [], "$src2 = $dst2, $src1 = $dst" > ,
+  PredNewRel {
+    bits<5> dst;
+    bits<5> src2;
+    bits<5> offset;
+    bits<4> offsetBits;
+
+    let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
+                                                  /* s4_0Imm */ offset{3-0});
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13-12} = 0b00;
+    let Inst{8-5} = offsetBits;
+    let Inst{4-0}   = dst;
+  }
+
+// Ryy=memh_fifo(Rx++#s4:1)
+// Ryy=memb_fifo(Rx++#s4:0)
+let accessSize = ByteAccess in
+def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = PostInc in
+class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
+                       MemAccessSize AccessSz>
+  : LDInstPI <(outs RC:$dst, IntRegs:$_dst_),
+              (ins IntRegs:$src1, ModRegs:$src2),
+  "$dst = "#mnemonic#"($src1++$src2)" ,
+  [], "$src1 = $_dst_" > {
+    bits<5> dst;
+    bits<5> src1;
+    bits<1> src2;
+
+    let accessSize = AccessSz;
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b110;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2;
+    let Inst{12}    = 0b0;
+    let Inst{7}     = 0b0;
+    let Inst{4-0}   = dst;
+  }
+
+let hasNewValue = 1 in {
+  def L2_loadrb_pr  : T_load_pr <"memb",  IntRegs, 0b1000, ByteAccess>;
+  def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
+  def L2_loadrh_pr  : T_load_pr <"memh",  IntRegs, 0b1010, HalfWordAccess>;
+  def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
+  def L2_loadri_pr  : T_load_pr <"memw",  IntRegs, 0b1100, WordAccess>;
+
+  def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
+}
+
+def L2_loadrd_pr   : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
+
+// Load predicate.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+                        (ins IntRegs:$addr, s11_2Ext:$off),
+                        ".error \"should not emit\"", []>;
+
+let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
+  def L2_deallocframe : LDInst<(outs), (ins),
+                     "deallocframe",
+                     []> {
+    let IClass = 0b1001;
+
+    let Inst{27-16} = 0b000000011110;
+    let Inst{13} = 0b0;
+    let Inst{4-0} = 0b11110;
+}
+
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
+  : LDInst <(outs RC:$dst, IntRegs:$_dst_),
+            (ins IntRegs:$Rz, ModRegs:$Mu),
+  "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+  "$Rz = $_dst_" > {
+    bits<5> dst;
+    bits<5> Rz;
+    bit Mu;
+
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12} = 0b0;
+    let Inst{9} = 0b1;
+    let Inst{7} = 0b0;
+    let Inst{4-0} = dst;
+ }
+
+let accessSize = ByteAccess in {
+  def L2_loadrb_pcr  : T_load_pcr <"memb",  IntRegs, 0b1000>;
+  def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess in {
+  def L2_loadrh_pcr   : T_load_pcr <"memh",   IntRegs, 0b1010>;
+  def L2_loadruh_pcr  : T_load_pcr <"memuh",  IntRegs, 0b1011>;
+  def L2_loadbsw2_pcr : T_load_pcr <"membh",  IntRegs, 0b0001>;
+  def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess in {
+  def  L2_loadri_pcr  : T_load_pcr <"memw", IntRegs, 0b1100>;
+  let hasNewValue = 0 in {
+    def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
+    def L2_loadbsw4_pcr : T_load_pcr <"membh",  DoubleRegs, 0b0111>;
+  }
+}
+
+let accessSize = DoubleWordAccess in
+def L2_loadrd_pcr  : T_load_pcr <"memd", DoubleRegs, 0b1110>;
+
+// Load / Post increment circular addressing mode.
+let Uses = [CS], hasSideEffects = 0 in
+class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
+  : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+            (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
+  "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
+  "$Rz = $_dst_, $dst = $_src_" > {
+    bits<5> dst;
+    bits<5> Rz;
+    bit Mu;
+
+    let accessSize = AccessSz;
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13}    = Mu;
+    let Inst{12}    = 0b0;
+    let Inst{9}     = 0b1;
+    let Inst{7}     = 0b0;
+    let Inst{4-0}   = dst;
+ }
+
+def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in
+class T_load_pci <string mnemonic, RegisterClass RC,
+                  Operand ImmOp, bits<4> MajOp>
+  : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
+             (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu),
+  "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [],
+  "$Rz = $_dst_"> {
+    bits<5> dst;
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+                     !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0})));
+    let IClass      = 0b1001;
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13}    = Mu;
+    let Inst{12}    = 0b0;
+    let Inst{9}     = 0b0;
+    let Inst{8-5}   = offsetBits;
+    let Inst{4-0}   = dst;
+  }
+
+// Byte variants of circ load
+let accessSize = ByteAccess in {
+  def L2_loadrb_pci  : T_load_pci <"memb",  IntRegs, s4_0Imm, 0b1000>;
+  def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
+}
+
+// Half word variants of circ load
+let accessSize = HalfWordAccess in {
+  def L2_loadrh_pci   : T_load_pci <"memh",   IntRegs, s4_1Imm, 0b1010>;
+  def L2_loadruh_pci  : T_load_pci <"memuh",  IntRegs, s4_1Imm, 0b1011>;
+  def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
+  def L2_loadbsw2_pci : T_load_pci <"membh",  IntRegs, s4_1Imm, 0b0001>;
+}
+
+// Word variants of circ load
+let accessSize = WordAccess in
+def L2_loadri_pci   : T_load_pci <"memw",   IntRegs,    s4_2Imm, 0b1100>;
+
+let accessSize = WordAccess, hasNewValue = 0 in {
+  def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
+  def L2_loadbsw4_pci : T_load_pci <"membh",  DoubleRegs, s4_2Imm, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess, hasNewValue = 0 in
+def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
+
+//===----------------------------------------------------------------------===//
+// Circular loads - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't
+// appear in the AsmString because it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1,  mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pci_pseudo <string opc, RegisterClass RC>
+  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4),
+  ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"",
+  [], "$src1 = $_dst_">;
+
+def L2_loadrb_pci_pseudo  : T_load_pci_pseudo <"memb",  IntRegs>;
+def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>;
+def L2_loadrh_pci_pseudo  : T_load_pci_pseudo <"memh",  IntRegs>;
+def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>;
+def L2_loadri_pci_pseudo  : T_load_pci_pseudo <"memw",  IntRegs>;
+def L2_loadrd_pci_pseudo  : T_load_pci_pseudo <"memd",  DoubleRegs>;
+
+
+// TODO: memb_fifo and memh_fifo must take destination register as input.
+// One-off circ loads - not enough in common to break into a class.
+let accessSize = ByteAccess in
+def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
+
+let accessSize = HalfWordAccess, opExtentAlign = 1 in
+def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
+
+// L[24]_load[wd]_locked: Load word/double with lock.
+let isSoloAX = 1 in
+class T_load_locked <string mnemonic, RegisterClass RC>
+  : LD0Inst <(outs RC:$dst),
+             (ins IntRegs:$src),
+    "$dst = "#mnemonic#"($src)"> {
+    bits<5> dst;
+    bits<5> src;
+    let IClass = 0b1001;
+    let Inst{27-21} = 0b0010000;
+    let Inst{20-16} = src;
+    let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
+    let Inst{5}   = 0;
+    let Inst{4-0} = dst;
+}
+let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
+  def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
+let accessSize = DoubleWordAccess in
+  def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
+
+// S[24]_store[wd]_locked: Store word/double conditionally.
+let isSoloAX = 1, isPredicateLate = 1 in
+class T_store_locked <string mnemonic, RegisterClass RC>
+  : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
+    mnemonic#"($Rs, $Pd) = $Rt"> {
+    bits<2> Pd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1010;
+    let Inst{27-23} = 0b00001;
+    let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
+    let Inst{21} = 0b1;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+    let Inst{1-0} = Pd;
+}
+
+let accessSize = WordAccess in
+def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
+
+let accessSize = DoubleWordAccess in
+def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_load_pbr<string mnemonic, RegisterClass RC,
+                            MemAccessSize addrSize, bits<4> majOp>
+  : LDInst
+    <(outs RC:$dst, IntRegs:$_dst_),
+     (ins IntRegs:$Rz, ModRegs:$Mu),
+     "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" ,
+      [] , "$Rz = $_dst_" > {
+
+      let accessSize = addrSize;
+
+      bits<5> dst;
+      bits<5> Rz;
+      bits<1> Mu;
+
+      let IClass = 0b1001;
+
+      let Inst{27-25} = 0b111;
+      let Inst{24-21} = majOp;
+      let Inst{20-16} = Rz;
+      let Inst{13} = Mu;
+      let Inst{12} = 0b0;
+      let Inst{7} = 0b0;
+      let Inst{4-0} = dst;
+  }
+
+let hasNewValue =1, opNewValue = 0 in {
+  def L2_loadrb_pbr   : T_load_pbr <"memb",  IntRegs, ByteAccess, 0b1000>;
+  def L2_loadrub_pbr  : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
+  def L2_loadrh_pbr   : T_load_pbr <"memh",  IntRegs, HalfWordAccess, 0b1010>;
+  def L2_loadruh_pbr  : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
+  def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
+  def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
+  def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
+}
+
+def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
+def L2_loadbsw4_pbr : T_load_pbr <"membh",  DoubleRegs, WordAccess, 0b0111>;
+def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
+
+def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
+def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
+                                   HalfWordAccess, 0b0010>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed loads - Pseudo
+//
+// Please note that 'src2' doesn't appear in the AsmString because
+// it's same as 'dst'.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_load_pbr_pseudo <string opc, RegisterClass RC>
+  : LDInstPI<(outs IntRegs:$_dst_, RC:$dst),
+             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+  ".error \"$dst = "#opc#"($src1++$src3:brev)\"",
+  [], "$src1 = $_dst_">;
+
+def L2_loadrb_pbr_pseudo  : T_load_pbr_pseudo <"memb",  IntRegs>;
+def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>;
+def L2_loadrh_pbr_pseudo  : T_load_pbr_pseudo <"memh",  IntRegs>;
+def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>;
+def L2_loadri_pbr_pseudo  : T_load_pbr_pseudo <"memw",  IntRegs>;
+def L2_loadrd_pbr_pseudo  : T_load_pbr_pseudo <"memd",  DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
+                 bit hasShift, bit isUnsigned>
+  : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+  "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+                                       #", $Rt."#!if(LHbits{0},"h)","l)")
+                                       #!if(hasShift,":<<1","")
+                                       #!if(isRnd,":rnd","")
+                                       #!if(isSat,":sat",""),
+  [], "", M_tc_3x_SLOT23 > {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{23} = hasShift;
+    let Inst{22} = isUnsigned;
+    let Inst{21} = isRnd;
+    let Inst{7} = isSat;
+    let Inst{6-5} = LHbits;
+    let Inst{4-0} = Rd;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+  }
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
+def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
+def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
+def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>;
+def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
+def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
+def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
+def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
+
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
+def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
+def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
+def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>;
+def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
+def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
+def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
+def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
+def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
+def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
+def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
+def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>;
+def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
+def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
+def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
+def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
+
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+let Defs = [USR_OVF] in {
+  def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
+  def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
+  def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
+  def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>;
+  def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>;
+  def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>;
+  def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>;
+  def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>;
+
+  def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>;
+  def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>;
+  def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>;
+  def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>;
+  def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>;
+  def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>;
+  def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>;
+  def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
+                 bit hasShift, bit isUnsigned >
+  : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+                              #"($Rs."#!if(LHbits{1},"h","l")
+                              #", $Rt."#!if(LHbits{0},"h)","l)")
+                              #!if(hasShift,":<<1","")
+                              #!if(isSat,":sat",""),
+  [], "$dst2 = $Rx", M_tc_3x_SLOT23 > {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+    let Inst{27-24} = 0b1110;
+    let Inst{23} = hasShift;
+    let Inst{22} = isUnsigned;
+    let Inst{21} = isNac;
+    let Inst{7} = isSat;
+    let Inst{6-5} = LHbits;
+    let Inst{4-0} = Rx;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+  }
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
+def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
+def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
+def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>;
+def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
+def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
+def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
+def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
+
+//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
+def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
+def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
+def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>;
+def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
+def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
+def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
+def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
+def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
+def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
+def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>;
+def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
+def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
+def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
+def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
+
+//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
+def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
+def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
+def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
+def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>;
+def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
+def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
+def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
+def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
+
+//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
+def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
+def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
+
+//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
+def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
+def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
+def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
+  : MInst_acc<(outs DoubleRegs:$Rxx),
+              (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
+                                #"($Rs."#!if(LHbits{1},"h","l")
+                                #", $Rt."#!if(LHbits{0},"h)","l)")
+                                #!if(hasShift,":<<1",""),
+  [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > {
+    bits<5> Rxx;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b0110;
+    let Inst{23} = hasShift;
+    let Inst{22} = isUnsigned;
+    let Inst{21} = isNac;
+    let Inst{7} = 0;
+    let Inst{6-5} = LHbits;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+  }
+
+def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
+def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
+def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
+def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>;
+
+def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>;
+def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>;
+def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>;
+def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>;
+
+def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>;
+def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>;
+def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>;
+def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>;
+
+def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>;
+def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>;
+def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>;
+def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>;
+
+def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>;
+def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>;
+def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>;
+def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>;
+
+def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>;
+def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>;
+def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>;
+def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>;
+
+def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>;
+def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>;
+def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>;
+def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>;
+
+def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
+def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
+def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
+def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
+                  bit isRnd, bit isSat >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                              #!if(isRnd,":rnd","")
+                              #!if(isSat,":sat",""),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+let Defs = [USR_OVF] in {
+def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
+def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
+def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
+def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyh_s0:  T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
+def M2_mmpyh_s1:  T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
+def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
+def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyl_s0:  T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
+def M2_mmpyl_s1:  T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
+def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
+def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyuh_s0:  T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
+def M2_mmpyuh_s1:  T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
+def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
+def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmpyul_s0:  T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
+def M2_mmpyul_s1:  T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
+def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
+def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
+}
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
+                   bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
+                   string op2Suffix = "", bit isRaw = 0, bit isHi = 0 >
+  : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2),
+  "$dst = "#mnemonic
+           #"($src1, $src2"#op2Suffix#")"
+           #!if(MajOp{2}, ":<<1", "")
+           #!if(isRnd, ":rnd", "")
+           #!if(isSat, ":sat", "")
+           #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > {
+    bits<5> dst;
+    bits<5> src1;
+    bits<5> src2;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = RegTyBits;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = src2;
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = dst;
+  }
+
+class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
+  : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
+
+class T_MType_dd  <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                   bit isSat = 0, bit isRnd = 0 >
+  : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr1  <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                    bit isSat = 0, bit isRnd = 0 >
+  : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>;
+
+class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                   bit isSat = 0, bit isRnd = 0, string op2str = "" >
+  : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
+
+def M2_vradduh    : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
+def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy",   0b000, 0b000, 1, 1>;
+def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy",   0b100, 0b000, 1, 1>;
+
+let CextOpcode = "mpyi", InputType = "reg" in
+def M2_mpyi    : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
+
+def M2_mpy_up  : T_MType_rr1 <"mpy",  0b000, 0b001>;
+def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
+
+def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
+
+def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
+def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
+
+def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
+def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
+
+def M2_cmpyrs_s0  : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
+def M2_cmpyrs_s1  : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
+def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
+def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
+
+// V4 Instructions
+def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
+def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
+def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
+def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
+
+def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
+def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
+
+def: Pat<(i32 (mul   I32:$src1, I32:$src2)), (M2_mpyi    I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up  I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
+  : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
+  "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" ,
+   pattern, "", M_tc_3x_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<8> u8;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b0000;
+    let Inst{23} = isNeg;
+    let Inst{13} = 0b0;
+    let Inst{4-0} = Rd;
+    let Inst{20-16} = Rs;
+    let Inst{12-5} = u8;
+  }
+
+let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
+def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
+                [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
+
+def M2_mpysin :  T_MType_mpy_ri <1, u8Imm,
+                [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
+                                                    u8ImmPred:$u8)))]>;
+
+// Assember mapped to M2_mpyi
+let isAsmParserOnly = 1 in
+def M2_mpyui : MInst<(outs IntRegs:$dst),
+                     (ins IntRegs:$src1, IntRegs:$src2),
+  "$dst = mpyui($src1, $src2)">;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+    CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
+    isAsmParserOnly = 1 in
+def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
+    "$dst = mpyi($src1, #$src2)",
+    [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
+                                   s32ImmPred:$src2))]>, ImmRegRel;
+
+let hasNewValue = 1, isExtendable = 1,  opExtentBits = 8, opExtendable = 3,
+    InputType = "imm" in
+class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp,
+                      list<dag> pattern = []>
+ : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3),
+  "$dst "#mnemonic#"($src2, #$src3)",
+  pattern, "$src1 = $dst", M_tc_2_SLOT23> {
+    bits<5> dst;
+    bits<5> src2;
+    bits<8> src3;
+
+    let IClass = 0b1110;
+
+    let Inst{27-26} = 0b00;
+    let Inst{25-23} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13} = 0b0;
+    let Inst{12-5} = src3;
+    let Inst{4-0} = dst;
+  }
+
+let InputType = "reg", hasNewValue = 1 in
+class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                      bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0,
+                      bit isSat = 0, bit isShift = 0>
+  : MInst < (outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+  "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)")
+                          #!if(isShift, ":<<1", "")
+                          #!if(isSat, ":sat", ""),
+  pattern, "$src1 = $dst", M_tc_2_SLOT23 > {
+    bits<5> dst;
+    bits<5> src2;
+    bits<5> src3;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1111;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = !if(isSwap, src3, src2);
+    let Inst{13} = 0b0;
+    let Inst{12-8} = !if(isSwap, src2, src3);
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = dst;
+  }
+
+let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
+  def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
+                  [(set (i32 IntRegs:$dst),
+                        (add (mul IntRegs:$src2, u32ImmPred:$src3),
+                             IntRegs:$src1))]>, ImmRegRel;
+
+  def M2_maci   : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
+                 [(set (i32 IntRegs:$dst),
+                       (add (mul IntRegs:$src2, IntRegs:$src3),
+                            IntRegs:$src1))]>, ImmRegRel;
+}
+
+let CextOpcode = "ADD_acc" in {
+  let isExtentSigned = 1 in
+  def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
+                 [(set (i32 IntRegs:$dst),
+                       (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
+                            (i32 IntRegs:$src1)))]>, ImmRegRel;
+
+  def M2_acci  : T_MType_acc_rr <"+= add",  0b000, 0b001, 0,
+                 [(set (i32 IntRegs:$dst),
+                       (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+                            (i32 IntRegs:$src1)))]>, ImmRegRel;
+}
+
+let CextOpcode = "SUB_acc" in {
+  let isExtentSigned = 1 in
+  def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
+
+  def M2_nacci  : T_MType_acc_rr <"-= add",  0b100, 0b001, 0>, ImmRegRel;
+}
+
+let Itinerary = M_tc_3x_SLOT23 in
+def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
+
+def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
+def M2_subacc : T_MType_acc_rr <"+= sub",  0b000, 0b011, 1>;
+
+class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
+                        PatLeaf ImmPred>
+  : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
+         (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
+
+class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
+         (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
+
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
+def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- XType Vector Instructions
+//===----------------------------------------------------------------------===//
+class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
+  : MInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
+  [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
+  : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
+  "$Rdd = "#opc#"($Rtt, $Rss)",
+  [], "",M_tc_2_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = 0b000;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
+def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
+def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
+def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def M2_vrcmpyi_s0:  T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
+def M2_vrcmaci_s0:  T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
+def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
+
+def M2_vrcmpyr_s0:  T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
+def M2_vrcmacr_s0:  T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
+def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
+
+// Vector reduce halfwords:
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
+def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Vector Multipy with accumulation.
+// Used for complex multiply real or imaginary, dual multiply and even halfwords
+//===----------------------------------------------------------------------===//
+let Defs = [USR_OVF] in
+class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
+                          bit hasShift, bit isRnd >
+  : MInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                               #!if(isRnd,":rnd","")#":sat",
+  [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
+                      bit hasShift, bit isRnd >
+  : MInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
+                               #!if(isRnd,":rnd",""),
+  [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmacls_s1:  T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
+def M2_mmacls_s0:  T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
+def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
+def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
+
+def M2_mmachs_s1:  T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
+def M2_mmachs_s0:  T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
+def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
+def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def M2_mmaculs_s1:  T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
+def M2_mmaculs_s0:  T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
+def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
+def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
+
+def M2_mmacuhs_s1:  T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
+def M2_mmacuhs_s0:  T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
+def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
+def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def M2_vmac2es:    T_M2_vmpy_acc     <"vmpyeh", 0b001, 0b010, 0, 0>;
+def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
+def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
+def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
+def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template Class -- Multiply signed/unsigned halfwords with and without
+// saturation and rounding
+//===----------------------------------------------------------------------===//
+class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
+  : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt),
+  "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
+                                       #", $Rt."#!if(LHbits{0},"h)","l)")
+                                       #!if(hasShift,":<<1","")
+                                       #!if(isRnd,":rnd",""),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b0100;
+    let Inst{23} = hasShift;
+    let Inst{22} = isUnsigned;
+    let Inst{21} = isRnd;
+    let Inst{6-5} = LHbits;
+    let Inst{4-0} = Rdd;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+}
+
+def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
+def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
+def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
+def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>;
+
+def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>;
+def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>;
+def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>;
+def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>;
+
+def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>;
+def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>;
+def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>;
+def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>;
+
+def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>;
+def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>;
+def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>;
+def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>;
+
+//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1]
+def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>;
+def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>;
+def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>;
+def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>;
+
+def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
+def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
+def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
+def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
+
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                     bit isSat, bit hasShift, bit isConj>
+   : MInst <(outs DoubleRegs:$Rdd),
+            (ins IntRegs:$Rs, IntRegs:$Rt),
+  "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")")
+                                #!if(hasShift,":<<1","")
+                                #!if(isSat,":sat",""),
+  [] > {
+    bits<5> Rdd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b0101;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rdd;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template Class for xtype mpy with accumulation into 64-bit:
+// Vector multiply
+// Complex multiply
+// multiply 32X32 and use full result
+//===----------------------------------------------------------------------===//
+class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
+                         bit isSat, bit hasShift, bit isConj>
+  : MInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")")
+                                   #!if(hasShift,":<<1","")
+                                   #!if(isSat,":sat",""),
+
+  [] , "$dst2 = $Rxx" > {
+    bits<5> Rxx;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b0111;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rxx;
+  }
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs,Rt)
+def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy",  "+", 0b000, 0b000, 0, 0, 0>;
+def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy",  "-", 0b001, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
+def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
+
+// Complex multiply real or imaginary
+// Rxx=cmpy[ir](Rs,Rt)
+def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
+def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
+def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
+
+// Complex multiply
+// Rdd=cmpy(Rs,Rt)[:<<]:sat
+def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
+def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
+
+// Rdd=cmpy(Rs,Rt*)[:<<]:sat
+def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
+def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def M2_cmacs_s0  : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
+def M2_cnacs_s0  : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
+def M2_cmacs_s1  : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
+def M2_cnacs_s1  : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
+def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
+def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
+def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<]:sat
+//let Defs = [USR_OVF] in {
+  def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
+  def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
+//}
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def M2_vmac2     : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
+def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
+def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
+
+def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
+                   (i64 (anyext (i32 IntRegs:$src2))))),
+         (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+                   (i64 (sext (i32 IntRegs:$src2))))),
+         (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (is_sext_i32:$src1),
+                   (is_sext_i32:$src2))),
+         (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+                   (mul (i64 (sext (i32 IntRegs:$src2))),
+                        (i64 (sext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+                   (mul (i64 (sext (i32 IntRegs:$src2))),
+                        (i64 (sext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+                   (mul (i64 (anyext (i32 IntRegs:$src2))),
+                        (i64 (anyext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+                   (mul (i64 (zext (i32 IntRegs:$src2))),
+                        (i64 (zext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+                   (mul (i64 (anyext (i32 IntRegs:$src2))),
+                        (i64 (anyext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+                   (mul (i64 (zext (i32 IntRegs:$src2))),
+                        (i64 (zext (i32 IntRegs:$src3)))))),
+         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+// Store doubleword.
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+                 bits<4> MajOp, bit isHalf >
+  : STInst <(outs IntRegs:$_dst_),
+            (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+  mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""),
+  [], "$src1 = $_dst_" >,
+  AddrModeRel {
+    bits<5> src1;
+    bits<5> src2;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+                     !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b1010;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = src2;
+    let Inst{7}     = 0b0;
+    let Inst{6-3}   = offsetBits;
+    let Inst{1}     = 0b0;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
+class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
+                   bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
+  : STInst <(outs IntRegs:$_dst_),
+            (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+  !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""),
+  [], "$src2 = $_dst_" >,
+  AddrModeRel {
+    bits<2> src1;
+    bits<5> src2;
+    bits<7> offset;
+    bits<5> src3;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
+                     !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0})));
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isPredNot;
+
+    let IClass = 0b1010;
+
+    let Inst{27-25} = 0b101;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13} = 0b1;
+    let Inst{12-8} = src3;
+    let Inst{7} = isPredNew;
+    let Inst{6-3} = offsetBits;
+    let Inst{2} = isPredNot;
+    let Inst{1-0} = src1;
+  }
+
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+                      Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > {
+
+  let BaseOpcode = "POST_"#BaseOp in {
+    def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>;
+
+    // Predicated
+    def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>;
+    def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>;
+
+    // Predicated new
+    def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+                                          isHalf, 0, 1>;
+    def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
+                                          isHalf, 1, 1>;
+  }
+}
+
+let accessSize = ByteAccess in
+defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
+
+let accessSize = HalfWordAccess in
+defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
+
+let accessSize = WordAccess in
+defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
+
+let accessSize = DoubleWordAccess in
+defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
+
+let accessSize = HalfWordAccess, isNVStorable = 0 in
+defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
+
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+                  InstHexagon MI>
+  : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+        (MI I32:$src2, imm:$offset, Value:$src1)>;
+
+def: Storepi_pat<post_truncsti8,  I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store,      I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment stores with register offset.
+//===----------------------------------------------------------------------===//
+class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+                     MemAccessSize AccessSz, bit isHalf = 0>
+  : STInst <(outs IntRegs:$_dst_),
+            (ins IntRegs:$src1, ModRegs:$src2, RC:$src3),
+  mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""),
+  [], "$src1 = $_dst_" > {
+    bits<5> src1;
+    bits<1> src2;
+    bits<5> src3;
+    let accessSize = AccessSz;
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b1010;
+
+    let Inst{27-24} = 0b1101;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13} = src2;
+    let Inst{12-8} = src3;
+    let Inst{7} = 0b0;
+  }
+
+def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
+def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
+def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
+def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
+def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
+
+let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+                  bits<3> MajOp, bit isH = 0>
+  : STInst <(outs),
+            (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+  mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
+  AddrModeRel, ImmRegRel {
+    bits<5> src1;
+    bits<14> src2; // Actual address offset
+    bits<5> src3;
+    bits<11> offsetBits; // Represents offset encoding
+
+    string ImmOpStr = !cast<string>(ImmOp);
+
+    let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
+                       !if (!eq(ImmOpStr, "s11_2Ext"), 13,
+                       !if (!eq(ImmOpStr, "s11_1Ext"), 12,
+                                        /* s11_0Ext */ 11)));
+    let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3},
+                     !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
+                     !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
+                                      /* s11_0Ext */ src2{10-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+    let IClass = 0b1010;
+
+    let Inst{27} = 0b0;
+    let Inst{26-25} = offsetBits{10-9};
+    let Inst{24} = 0b1;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13} = offsetBits{8};
+    let Inst{12-8} = src3;
+    let Inst{7-0} = offsetBits{7-0};
+  }
+
+let opExtendable = 2, isPredicated = 1 in
+class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
+                   bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0>
+  : STInst <(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+  !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""),
+  [],"",V2LDST_tc_st_SLOT01 >,
+   AddrModeRel, ImmRegRel {
+    bits<2> src1;
+    bits<5> src2;
+    bits<9> src3; // Actual address offset
+    bits<5> src4;
+    bits<6> offsetBits; // Represents offset encoding
+
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = PredNot;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
+                       !if (!eq(ImmOpStr, "u6_2Ext"), 8,
+                       !if (!eq(ImmOpStr, "u6_1Ext"), 7,
+                                        /* u6_0Ext */ 6)));
+    let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3},
+                     !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
+                     !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
+                                      /* u6_0Ext */ src3{5-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+    let IClass = 0b0100;
+
+    let Inst{27} = 0b0;
+    let Inst{26} = PredNot;
+    let Inst{25} = isPredNew;
+    let Inst{24} = 0b0;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13} = offsetBits{5};
+    let Inst{12-8} = src4;
+    let Inst{7-3} = offsetBits{4-0};
+    let Inst{1-0} = src1;
+  }
+
+let isExtendable = 1, hasSideEffects = 0 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                 Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>;
+
+    // Predicated
+    def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>;
+    def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>;
+
+    // Predicated new
+    def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+                                         MajOp, 0, 1, isH>;
+    def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp,
+                                         MajOp, 1, 1, isH>;
+  }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm" in {
+  let accessSize = ByteAccess in
+    defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
+
+  let accessSize = HalfWordAccess, opExtentAlign = 1 in
+    defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>;
+
+  let accessSize = WordAccess, opExtentAlign = 2 in
+    defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>;
+
+  let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in
+    defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+                            u6_3Ext, 0b110>;
+
+  let accessSize = HalfWordAccess, opExtentAlign = 1 in
+    defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext,
+                            u6_1Ext, 0b011, 1>;
+}
+
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                        InstHexagon MI>
+  : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+        (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                     InstHexagon MI>
+  : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+        (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+        (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod.  The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                     InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi),
+        (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                         PatFrag ValueMod, InstHexagon MI>
+  : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+        (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                      PatFrag ValueMod, InstHexagon MI>
+  : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+        (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                         InstHexagon MI>
+  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+        (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                      InstHexagon MI> {
+  def: Storex_fi_pat     <Store, Value,          MI>;
+  def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+  def: Storex_add_pat    <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                       PatFrag ValueMod, InstHexagon MI> {
+  def: Storexm_fi_pat     <Store, Value,          ValueMod, MI>;
+  def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+  def: Storexm_add_pat    <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+  : PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
+
+let AddedComplexity = 20 in {
+  defm: Storex_pat<truncstorei8,    I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<truncstorei16,   I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<store,           I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<store,           I64, s29_3ImmPred, S2_storerd_io>;
+
+  defm: Storex_pat<SwapSt<atomic_store_8>,  I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8,    I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16,   I32, S2_storerh_io>;
+def: Storex_simple_pat<store,           I32, S2_storeri_io>;
+def: Storex_simple_pat<store,           I64, S2_storerd_io>;
+
+def: Storex_simple_pat<SwapSt<atomic_store_8>,  I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
+
+let AddedComplexity = 20 in {
+  defm: Storexm_pat<truncstorei8,  I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+  defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+  defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8,  I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
+
+// Store predicate.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_pred : STInst<(outs),
+      (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1),
+      ".error \"should not emit\"", []>;
+
+// S2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R29, R31, R30],
+    hasSideEffects = 0, accessSize = DoubleWordAccess in
+def S2_allocframe: ST0Inst <
+  (outs), (ins u11_3Imm:$u11_3),
+  "allocframe(#$u11_3)" > {
+    bits<14> u11_3;
+
+    let IClass = 0b1010;
+    let Inst{27-16} = 0b000010011101;
+    let Inst{13-11} = 0b000;
+    let Inst{10-0} = u11_3{13-3};
+  }
+
+// S2_storer[bhwdf]_pci: Store byte/half/word/double.
+// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
+let Uses = [CS] in
+class T_store_pci <string mnemonic, RegisterClass RC,
+                         Operand Imm, bits<4>MajOp,
+                         MemAccessSize AlignSize, string RegSrc = "Rt">
+  : STInst <(outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt),
+  #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"",
+  [] ,
+  "$Rz = $_dst_" > {
+    bits<5> Rz;
+    bits<7> offset;
+    bits<1> Mu;
+    bits<5> Rt;
+    let accessSize = AlignSize;
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+                       !if(!eq(RegSrc,"Rt.h"), 0, 1));
+
+    let IClass = 0b1010;
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-8} = Rt;
+    let Inst{7} = 0b0;
+    let Inst{6-3} =
+      !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3},
+      !if (!eq(!cast<string>(AlignSize), "WordAccess"),       offset{5-2},
+      !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"),   offset{4-1},
+                                       /* ByteAccess */       offset{3-0})));
+    let Inst{1} = 0b0;
+  }
+
+def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
+                                 ByteAccess>;
+def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
+                                 HalfWordAccess>;
+def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
+                                 HalfWordAccess, "Rt.h">;
+def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
+                                 WordAccess>;
+def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
+                                 DoubleWordAccess>;
+
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
+class T_storenew_pci <string mnemonic, Operand Imm,
+                             bits<2>MajOp, MemAccessSize AlignSize>
+  : NVInst < (outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
+  #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
+  [],
+  "$Rz = $_dst_"> {
+    bits<5> Rz;
+    bits<6> offset;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let accessSize = AlignSize;
+
+    let IClass = 0b1010;
+    let Inst{27-21} = 0b1001101;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = Nt;
+    let Inst{7} = 0b0;
+    let Inst{6-3} =
+      !if (!eq(!cast<string>(AlignSize), "WordAccess"),     offset{5-2},
+      !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
+                                       /* ByteAccess */     offset{3-0}));
+    let Inst{1} = 0b0;
+  }
+
+def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
+def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
+def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pci_pseudo <string opc, RegisterClass RC>
+  : STInstPI<(outs IntRegs:$_dst_),
+             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4),
+  ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"",
+  [], "$_dst_ = $src1">;
+
+def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>;
+def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>;
+def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>;
+def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// Circular stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS] in
+class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
+                               MemAccessSize AlignSize, string RegSrc = "Rt">
+  : STInst <(outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt),
+  #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"",
+  [],
+  "$Rz = $_dst_" > {
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<5> Rt;
+
+    let accessSize = AlignSize;
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+                       !if(!eq(RegSrc,"Rt.h"), 0, 1));
+
+    let IClass = 0b1010;
+    let Inst{27-25} = 0b100;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-8} = Rt;
+    let Inst{7} = 0b0;
+    let Inst{1} = 0b1;
+  }
+
+def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
+def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
+def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
+def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
+def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
+                                 HalfWordAccess, "Rt.h">;
+
+//===----------------------------------------------------------------------===//
+// Circular .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_storenew_pcr <string mnemonic, bits<2>MajOp,
+                                   MemAccessSize AlignSize>
+  : NVInst <(outs IntRegs:$_dst_),
+  (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+  #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
+  [] ,
+  "$Rz = $_dst_"> {
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let accessSize = AlignSize;
+
+    let IClass = 0b1010;
+    let Inst{27-21} = 0b1001101;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = Nt;
+    let Inst{7} = 0b0;
+    let Inst{1} = 0b1;
+  }
+
+def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_store_pbr<string mnemonic, RegisterClass RC,
+                            MemAccessSize addrSize, bits<3> majOp,
+                            bit isHalf = 0>
+  : STInst
+    <(outs IntRegs:$_dst_),
+     (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src),
+     #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""),
+     [], "$Rz = $_dst_" > {
+
+      let accessSize = addrSize;
+
+      bits<5> Rz;
+      bits<1> Mu;
+      bits<5> src;
+
+      let IClass = 0b1010;
+
+      let Inst{27-24} = 0b1111;
+      let Inst{23-21} = majOp;
+      let Inst{7} = 0b0;
+      let Inst{20-16} = Rz;
+      let Inst{13} = Mu;
+      let Inst{12-8} = src;
+    }
+
+let isNVStorable = 1 in {
+  let BaseOpcode = "S2_storerb_pbr" in
+  def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
+                                             0b000>, NewValueRel;
+  let BaseOpcode = "S2_storerh_pbr" in
+  def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess,
+                                             0b010>, NewValueRel;
+  let BaseOpcode = "S2_storeri_pbr" in
+  def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
+                                             0b100>, NewValueRel;
+}
+
+def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
+def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed .new stores with auto-increment register
+//===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
+    hasSideEffects = 0 in
+class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
+  : NVInst <(outs IntRegs:$_dst_),
+            (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
+     #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
+     "$Rz = $_dst_">, NewValueRel {
+    let accessSize = addrSize;
+    bits<5> Rz;
+    bits<1> Mu;
+    bits<3> Nt;
+
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1111101;
+    let Inst{12-11} = majOp;
+    let Inst{7} = 0b0;
+    let Inst{20-16} = Rz;
+    let Inst{13} = Mu;
+    let Inst{10-8} = Nt;
+  }
+
+let BaseOpcode = "S2_storerb_pbr" in
+def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
+
+let BaseOpcode = "S2_storerh_pbr" in
+def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
+
+let BaseOpcode = "S2_storeri_pbr" in
+def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
+
+//===----------------------------------------------------------------------===//
+// Bit-reversed stores - Pseudo
+//
+// Please note that the input operand order in the pseudo instructions
+// doesn't match with the real instructions. Pseudo instructions operand
+// order should mimics the ordering in the intrinsics.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1,  mayStore = 1, hasSideEffects = 0, isPseudo = 1 in
+class T_store_pbr_pseudo <string opc, RegisterClass RC>
+  : STInstPI<(outs IntRegs:$_dst_),
+             (ins IntRegs:$src1, RC:$src2, IntRegs:$src3),
+  ".error \""#opc#"($src1++$src3:brev) = $src2\"",
+  [], "$_dst_ = $src1">;
+
+def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>;
+def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>;
+def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>;
+def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>;
+
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Template class for S_2op instructions.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+                RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
+  : SInst <(outs RCOut:$dst), (ins RCIn:$src),
+  "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""),
+  [], "", S_2op_tc_1_SLOT23 > {
+    bits<5> dst;
+    bits<5> src;
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = RegTyBits;
+    let Inst{23-22} = MajOp;
+    let Inst{21} = 0b0;
+    let Inst{20-16} = src;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = dst;
+  }
+
+class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp>
+  : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+  : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>;
+
+let hasNewValue = 1 in
+class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
+  : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
+
+// Vector sign/zero extend
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
+  def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
+  def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
+  def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
+}
+
+// Vector splat bytes/halfwords
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
+  def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
+}
+
+// Sign extend word to doubleword
+def A2_sxtw   : T_S2op_1_di <"sxtw", 0b01, 0b000>;
+
+def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+
+// Vector saturate and pack
+let Defs = [USR_OVF] in {
+  def S2_svsathb  : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
+  def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
+  def S2_vsathb   : T_S2op_1_id <"vsathb", 0b00, 0b110>;
+  def S2_vsathub  : T_S2op_1_id <"vsathub", 0b00, 0b000>;
+  def S2_vsatwh   : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
+  def S2_vsatwuh  : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
+}
+
+// Vector truncate
+def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
+def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
+
+// Swizzle the bytes of a word
+def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
+
+// Saturate
+let Defs = [USR_OVF] in {
+  def A2_sat   : T_S2op_1_id <"sat", 0b11, 0b000>;
+  def A2_satb  : T_S2op_1_ii <"satb", 0b11, 0b111>;
+  def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
+  def A2_sath  : T_S2op_1_ii <"sath", 0b11, 0b100>;
+  def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
+  def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
+}
+
+let Itinerary = S_2op_tc_2_SLOT23 in {
+  // Vector round and pack
+  def S2_vrndpackwh   : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
+
+  let Defs = [USR_OVF] in
+  def S2_vrndpackwhs  : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
+
+  // Bit reverse
+  def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
+
+  // Absolute value word
+  def A2_abs    : T_S2op_1_ii <"abs", 0b10, 0b100>;
+
+  let Defs = [USR_OVF] in
+  def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>;
+
+  // Negate with saturation
+  let Defs = [USR_OVF] in
+  def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
+}
+
+def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
+                      (i32 (sub 0, (i32 IntRegs:$src))),
+                      (i32 IntRegs:$src))),
+         (A2_abs IntRegs:$src)>;
+
+let AddedComplexity = 50 in
+def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
+                        (i32 IntRegs:$src)),
+                   (sra (i32 IntRegs:$src), (i32 31)))),
+         (A2_abs IntRegs:$src)>;
+
+class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
+                RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
+                bit isSat, bit isRnd, list<dag> pattern = []>
+  : SInst <(outs RCOut:$dst),
+  (ins RCIn:$src, u5Imm:$u5),
+  "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
+                                   #!if(isRnd, ":rnd", ""),
+  pattern, "", S_2op_tc_2_SLOT23> {
+    bits<5> dst;
+    bits<5> src;
+    bits<5> u5;
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = RegTyBits;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src;
+    let Inst{13} = 0b0;
+    let Inst{12-8} = u5;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = dst;
+  }
+
+class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+  : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+  : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
+
+let hasNewValue = 1 in
+class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                   bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
+  : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp,
+              isSat, isRnd, pattern>;
+
+class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
+  : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0,
+    [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
+                                    (u5ImmPred:$u5)))]>;
+
+// Vector arithmetic shift right by immediate with truncate and pack
+def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
+
+// Arithmetic/logical shift right/left by immediate
+let Itinerary = S_2op_tc_1_SLOT23 in {
+  def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
+  def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
+  def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
+}
+
+// Shift left by immediate with saturation
+let Defs = [USR_OVF] in
+def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
+
+// Shift right with round
+def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
+
+let isAsmParserOnly = 1 in
+def S2_asr_i_r_rnd_goodsyntax
+  : SInst <(outs IntRegs:$dst), (ins  IntRegs:$src, u5Imm:$u5),
+  "$dst = asrrnd($src, #$u5)",
+  [], "", S_2op_tc_1_SLOT23>;
+
+let isAsmParserOnly = 1 in
+def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
+  "$dst = not($src)">;
+
+def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
+                             (i32 1))),
+                   (i32 1))),
+         (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>;
+
+class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
+  : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+           "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
+  bits<5> Rss;
+  bits<5> Rdd;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0;
+  let Inst{23-22} = MajOp;
+  let Inst{20-16} = Rss;
+  let Inst{7-5} = minOp;
+  let Inst{4-0} = Rdd;
+}
+
+def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
+def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
+def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
+
+// Innterleave/deinterleave
+def S2_interleave   : T_S2op_3 <"interleave",   0b11, 0b101>;
+def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
+
+// Vector Complex conjugate
+def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
+
+// Vector saturate without pack
+def S2_vsathb_nopack  : T_S2op_3 <"vsathb",  0b00, 0b111>;
+def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
+def S2_vsatwh_nopack  : T_S2op_3 <"vsatwh",  0b00, 0b110>;
+def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def A2_vabsh    : T_S2op_3 <"vabsh", 0b01, 0b100>;
+def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
+
+// Vector absolute value words with and without saturation
+def A2_vabsw    : T_S2op_3 <"vabsw", 0b01, 0b110>;
+def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
+
+def : Pat<(not (i64 DoubleRegs:$src1)),
+          (A2_notp DoubleRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+// Bit count
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32,
+                dag Out, dag Inp>
+    : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> {
+  bits<5> Rs;
+  bits<5> Rd;
+  let IClass = 0b1000;
+  let Inst{27} = 0b1;
+  let Inst{26} = Is32;
+  let Inst{25-24} = 0b00;
+  let Inst{23-21} = MajOp;
+  let Inst{20-16} = Rs;
+  let Inst{7-5} = MinOp;
+  let Inst{4-0} = Rd;
+}
+
+class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp>
+    : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1,
+                      (outs IntRegs:$Rd), (ins IntRegs:$Rs)>;
+
+class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
+    : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
+                      (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
+
+def S2_cl0     : T_COUNT_LEADING_32<"cl0",     0b000, 0b101>;
+def S2_cl1     : T_COUNT_LEADING_32<"cl1",     0b000, 0b110>;
+def S2_ct0     : T_COUNT_LEADING_32<"ct0",     0b010, 0b100>;
+def S2_ct1     : T_COUNT_LEADING_32<"ct1",     0b010, 0b101>;
+def S2_cl0p    : T_COUNT_LEADING_64<"cl0",     0b010, 0b010>;
+def S2_cl1p    : T_COUNT_LEADING_64<"cl1",     0b010, 0b100>;
+def S2_clb     : T_COUNT_LEADING_32<"clb",     0b000, 0b100>;
+def S2_clbp    : T_COUNT_LEADING_64<"clb",     0b010, 0b000>;
+def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
+
+// Count leading zeros.
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+
+// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
+
+// Bit set/clear/toggle
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
+    : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5),
+            "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> u5;
+  let IClass = 0b1000;
+  let Inst{27-21} = 0b1100110;
+  let Inst{20-16} = Rs;
+  let Inst{13} = 0b0;
+  let Inst{12-8} = u5;
+  let Inst{7-5} = MinOp;
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
+    : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+            "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+  let IClass = 0b1100;
+  let Inst{27-22} = 0b011010;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7-6} = MinOp;
+  let Inst{4-0} = Rd;
+}
+
+def S2_clrbit_i    : T_SCT_BIT_IMM<"clrbit",    0b001>;
+def S2_setbit_i    : T_SCT_BIT_IMM<"setbit",    0b000>;
+def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
+def S2_clrbit_r    : T_SCT_BIT_REG<"clrbit",    0b01>;
+def S2_setbit_r    : T_SCT_BIT_REG<"setbit",    0b00>;
+def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
+
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
+         (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+         (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
+         (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
+         (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+         (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+         (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
+
+// Bit test
+
+let hasSideEffects = 0 in
+class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5),
+            "$Pd = "#MnOp#"($Rs, #$u5)",
+            [], "", S_2op_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<5> u5;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b0101;
+  let Inst{23-21} = MajOp;
+  let Inst{20-16} = Rs;
+  let Inst{13} = 0;
+  let Inst{12-8} = u5;
+  let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BIT_REG<string MnOp, bit IsNeg>
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+            "$Pd = "#MnOp#"($Rs, $Rt)",
+            [], "", S_3op_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<5> Rt;
+  let IClass = 0b1100;
+  let Inst{27-22} = 0b011100;
+  let Inst{21} = IsNeg;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{1-0} = Pd;
+}
+
+def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
+def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+  def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+           (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
+  def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+           (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
+           (S2_tstbit_i IntRegs:$Rs, 0)>;
+  def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
+           (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
+            "$Pd = "#MnOp#"($Rs, #$u6)",
+            [], "", S_2op_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<6> u6;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b0101;
+  let Inst{23-22} = MajOp;
+  let Inst{21} = IsNeg;
+  let Inst{20-16} = Rs;
+  let Inst{13-8} = u6;
+  let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0 in
+class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+            "$Pd = "#MnOp#"($Rs, $Rt)",
+            [], "", S_3op_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<5> Rt;
+  let IClass = 0b1100;
+  let Inst{27-24} = 0b0111;
+  let Inst{23-22} = MajOp;
+  let Inst{21} = IsNeg;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{1-0} = Pd;
+}
+
+def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
+def C2_bitsclr  : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
+def C2_bitsset  : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
+
+let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
+  def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
+           (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>;
+  def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
+           (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+let AddedComplexity = 10 in   // Complexity greater than compare reg-reg.
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
+         (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM +
+//===----------------------------------------------------------------------===//
+
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+                               (i32 8)),
+                          (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+             (zextloadi8 (i32 IntRegs:$b))),
+         (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Predicate transfer.
+let hasSideEffects = 0, hasNewValue = 1 in
+def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
+      "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<2> Ps;
+
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b1001;
+  let Inst{22} = 0b1;
+  let Inst{17-16} = Ps;
+  let Inst{4-0} = Rd;
+}
+
+// Transfer general register to predicate.
+let hasSideEffects = 0 in
+def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
+      "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<5> Rs;
+
+  let IClass = 0b1000;
+  let Inst{27-21} = 0b0101010;
+  let Inst{20-16} = Rs;
+  let Inst{1-0} = Pd;
+}
+
+let hasSideEffects = 0, isCodeGenOnly = 1 in
+def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
+     "$dst = $src">;
+
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+         (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load (i32 IntRegs:$Rs))),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+                        (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+                        (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
+                   Operand Imm, list<dag> pattern = [], bit isRnd = 0>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2),
+           "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""),
+           pattern> {
+  bits<5> src1;
+  bits<5> dst;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0;
+  let Inst{23-21} = MajOp;
+  let Inst{20-16} = src1;
+  let Inst{7-5} = MinOp;
+  let Inst{4-0} = dst;
+}
+
+class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
+  : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm,
+  [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+                                        u6ImmPred:$src2))]> {
+  bits<6> src2;
+  let Inst{13-8} = src2;
+}
+
+// Shift by immediate.
+def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
+def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
+def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
+
+// Shift left by small amount and add.
+let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
+def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
+                           (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
+  "$Rd = addasl($Rt, $Rs, #$u3)" ,
+  [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt),
+                                (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))],
+  "", S_3op_tc_2_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rt;
+    bits<5> Rs;
+    bits<3> u3;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b0100000;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = u3;
+    let Inst{4-0}   = Rd;
+  }
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
+
+let hasSideEffects = 1, isSoloAX = 1 in
+def Y2_barrier : SYSInst<(outs), (ins),
+                     "barrier",
+                     [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
+  let Inst{31-28} = 0b1010;
+  let Inst{27-21} = 0b1000000;
+}
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+    isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+  def TFR_FI  : ALU32_ri<(outs IntRegs:$Rd),
+                         (ins IntRegs:$fi, s32Imm:$off), "">;
+  def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd),
+                         (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">;
+}
+
+//===----------------------------------------------------------------------===//
+// CRUSER - Type.
+//===----------------------------------------------------------------------===//
+// HW loop
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+    opExtendable = 0, hasSideEffects = 0 in
+class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+         : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2),
+           #mnemonic#"($offset, #$src2)",
+           [], "" , CR_tc_3x_SLOT3> {
+    bits<9> offset;
+    bits<10> src2;
+
+    let IClass = 0b0110;
+
+    let Inst{27-22} = 0b100100;
+    let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+    let Inst{20-16} = src2{9-5};
+    let Inst{12-8} = offset{8-4};
+    let Inst{7-5} = src2{4-2};
+    let Inst{4-3} = offset{3-2};
+    let Inst{1-0} = src2{1-0};
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+    opExtendable = 0, hasSideEffects = 0 in
+class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+         : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
+           #mnemonic#"($offset, $src2)",
+           [], "" ,CR_tc_3x_SLOT3> {
+    bits<9> offset;
+    bits<5> src2;
+
+    let IClass = 0b0110;
+
+    let Inst{27-22} = 0b000000;
+    let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+    let Inst{20-16} = src2;
+    let Inst{12-8} = offset{8-4};
+    let Inst{4-3} = offset{3-2};
+  }
+
+multiclass LOOP_ri<string mnemonic> {
+  def i : LOOP_iBase<mnemonic, brtarget>;
+  def r : LOOP_rBase<mnemonic, brtarget>;
+
+  let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+    def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+    def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+  }
+}
+
+
+let Defs = [SA0, LC0, USR] in
+defm J2_loop0 : LOOP_ri<"loop0">;
+
+// Interestingly only loop0's appear to set usr.lpcfg
+let Defs = [SA1, LC1] in
+defm J2_loop1 : LOOP_ri<"loop1">;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+    Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+                       ":endloop0",
+                       []>;
+}
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+    Defs = [PC, LC1], Uses = [SA1, LC1] in {
+def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset),
+                       ":endloop1",
+                       []>;
+}
+
+// Pipelined loop instructions, sp[123]loop0
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+    isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+    opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_iBase<string SP, bits<2> op>
+  : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10),
+  "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
+    bits<9> r7_2;
+    bits<10> U10;
+
+    let IClass = 0b0110;
+
+    let Inst{22-21} = op;
+    let Inst{27-23} = 0b10011;
+    let Inst{20-16} = U10{9-5};
+    let Inst{12-8} = r7_2{8-4};
+    let Inst{7-5} = U10{4-2};
+    let Inst{4-3} = r7_2{3-2};
+    let Inst{1-0} = U10{1-0};
+  }
+
+let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
+    isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
+    opExtendable = 0, isPredicateLate = 1 in
+class SPLOOP_rBase<string SP, bits<2> op>
+  : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs),
+  "p3 = sp"#SP#"loop0($r7_2, $Rs)" > {
+    bits<9> r7_2;
+    bits<5> Rs;
+
+    let IClass = 0b0110;
+
+    let Inst{22-21} = op;
+    let Inst{27-23} = 0b00001;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = r7_2{8-4};
+    let Inst{4-3} = r7_2{3-2};
+  }
+
+multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
+  def i : SPLOOP_iBase<mnemonic, op>;
+  def r : SPLOOP_rBase<mnemonic, op>;
+}
+
+defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
+defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
+defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
+
+// if (Rs[!>=<]=#0) jump:[t/nt]
+let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
+    hasSideEffects = 0 in
+class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
+  : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
+  "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
+    bits<5> Rs;
+    bits<15> r13_2;
+
+    let IClass = 0b0110;
+
+    let Inst{27-24} = 0b0001;
+    let Inst{23-22} = op;
+    let Inst{12} = isTak;
+    let Inst{21} = r13_2{14};
+    let Inst{20-16} = Rs;
+    let Inst{11-1} = r13_2{12-2};
+    let Inst{13} = r13_2{13};
+  }
+
+multiclass J2_jump_compare_0<string compare, bits<2> op> {
+  def NAME    : J2_jump_0_Base<compare, 0, op>;
+  def NAME#pt : J2_jump_0_Base<compare, 1, op>;
+}
+
+defm J2_jumprz    : J2_jump_compare_0<"!=", 0b00>;
+defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
+defm J2_jumprnz   : J2_jump_compare_0<"==", 0b10>;
+defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
+
+// Transfer to/from Control/GPR Guest/GPR
+let hasSideEffects = 0 in
+class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
+  : CRInst <(outs CTRC:$dst), (ins RC:$src),
+  "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+    bits<5> dst;
+    bits<5> src;
+
+    let IClass = 0b0110;
+
+    let Inst{27-25} = 0b001;
+    let Inst{24} = isDouble;
+    let Inst{23-21} = 0b001;
+    let Inst{20-16} = src;
+    let Inst{4-0} = dst;
+  }
+
+def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
+def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+
+let hasSideEffects = 0 in
+class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
+  : CRInst <(outs RC:$dst), (ins CTRC:$src),
+  "$dst = $src", [], "", CR_tc_3x_SLOT3> {
+    bits<5> dst;
+    bits<5> src;
+
+    let IClass = 0b0110;
+
+    let Inst{27-26} = 0b10;
+    let Inst{25} = isSingle;
+    let Inst{24-21} = 0b0000;
+    let Inst{20-16} = src;
+    let Inst{4-0} = dst;
+  }
+
+let hasNewValue = 1, opNewValue = 0 in
+def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
+def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+// Y4_trace: Send value to etm trace.
+let isSoloAX = 1, hasSideEffects = 0 in
+def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
+  "trace($Rs)"> {
+    bits<5> Rs;
+
+    let IClass = 0b0110;
+    let Inst{27-21} = 0b0010010;
+    let Inst{20-16} = Rs;
+  }
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisPtrTy<0>]>;
+def HexagonCONST32    : SDNode<"HexagonISD::CONST32",    SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+    hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp>
+  : ALU32_ri<(outs IntRegs:$dst),
+              (ins i32imm:$imm_value),
+              "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> {
+    bits<5> dst;
+    bits<32> imm_value;
+    let IClass = 0b0111;
+
+    let Inst{27} = Rs;
+    let Inst{26-24} = MajOp;
+    let Inst{21} = MinOp;
+    let Inst{20-16} = dst;
+    let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30});
+    let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16});
+}
+
+let isAsmParserOnly = 1 in {
+  def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>;
+  def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>;
+  def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>;
+  def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>;
+}
+
+let  isMoveImm = 1, isCodeGenOnly = 1 in
+def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+             "$dst.l = #LO($label@GOTREL)",
+             []>;
+
+let  isMoveImm = 1, isCodeGenOnly = 1 in
+def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
+             "$dst.h = #HI($label@GOTREL)",
+             []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+             "$dst.h = #HI($global@GOT)",
+             []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+             "$dst.l = #LO($global@GOT)",
+             []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+                "$dst.h = #HI($global@GOTREL)",
+                []>;
+
+let isReMaterializable = 1, isMoveImm = 1,
+    isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+               "$dst.l = #LO($global@GOTREL)",
+               []>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+// This is for sdata.
+let isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+              "$dst = CONST32(#$global)",
+              [(set (i32 IntRegs:$dst),
+                    (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+                       "$dst = CONST32(#$global)",
+                       [(set (i32 IntRegs:$dst), imm:$global) ]>;
+
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label),           (A2_tfrsi s16Ext:$label)>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+                       "$dst = CONST64(#$global)",
+                       [(set (i64 DoubleRegs:$dst), imm:$global)]>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+    isCodeGenOnly = 1 in
+def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "",
+                 [(set (i1 PredRegs:$dst), 1)]>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+    isCodeGenOnly = 1 in
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)",
+                  [(set (i1 PredRegs:$dst), 0)]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                    [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall  : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+                          [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                              ".error \"should not emit\" ",
+                              [(callseq_start timm:$amt)]>;
+
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                             ".error \"should not emit\" ",
+                             [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+// Call subroutine indirectly.
+let Defs = VolatileV3.Regs in
+def J2_callr : JUMPR_MISC_CALLR<0, 1>;
+
+// Indirect tail-call.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+    isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNr : T_JMPr;
+
+// Direct tail-calls.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+    isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
+
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+         (TCRETURNi tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+         (TCRETURNi texternalsym:$dst)>;
+def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
+         (TCRETURNr IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def: Pat<(and (i32 IntRegs:$src1), 65535),
+         (A2_zxth IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def: Pat<(and (i32 IntRegs:$src1), 255),
+         (A2_zxtb IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+//     Add(p1, false) should never be produced,
+//     if it does, it got to be mapped to NOOP.
+def: Pat<(add (i1 PredRegs:$src1), -1),
+         (C2_not PredRegs:$src1)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+         (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
+                 (i32 IntRegs:$src3)),
+         (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+         (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+         (A2_sxtw (LoReg DoubleRegs:$src1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+         (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+         (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
+
+// We want to prevent emitting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a J2_jumpf.
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+                        bb:$offset),
+      (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+                bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
+                        bb:$offset),
+      (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+         (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
+def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
+        (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
+                  bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+                 (i64 DoubleRegs:$src3)),
+         (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+                                              (HiReg DoubleRegs:$src3)),
+                      (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+                                              (LoReg DoubleRegs:$src3)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
+         (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+                (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+         (LoReg DoubleRegs:$src)>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+         (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
+// rs <= rt -> !(rs > rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+      (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+         (C2_xor PredRegs:$src1, PredRegs:$src2)>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+      (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
+
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+// rs < rt -> !(rs >= rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1,
+                            (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
+         (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+         (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Sign extends.
+// i1 -> i32
+def: Pat<(i32 (sext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, -1, 0)>;
+
+// i1 -> i64
+def: Pat<(i64 (sext (i1 PredRegs:$src1))),
+         (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+
+// Zero extends.
+// i1 -> i32
+def: Pat<(i32 (zext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
+         (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+  (A2_addp
+    (M2_dpmpyuu_acc_s0
+      (S2_lsr_i_p
+        (A2_addp
+          (M2_dpmpyuu_acc_s0
+            (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32),
+            (HiReg $src1),
+            (LoReg $src2)),
+          (A2_combinew (A2_tfrsi 0),
+                       (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))),
+        32),
+      (HiReg $src1),
+      (HiReg $src2)),
+    (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32)
+)>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+      [SDNPHasChain]>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1,
+    usesCustomInserter = 1 in
+def ALLOCA: ALU32Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, u32Imm:$A), "",
+      [(set (i32 IntRegs:$Rd),
+            (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+let isCodeGenOnly = 1 in
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+                "$dst = $src1",
+                [(set (i32 IntRegs:$dst),
+                      (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
+
+let AddedComplexity = 100 in
+def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
+         (i32 IntRegs:$src1)>;
+
+def HexagonJT:     SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP:     SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
+
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>;
+
+// XTYPE/SHIFT
+//
+//===----------------------------------------------------------------------===//
+// Template Class
+// Shift by immediate/register and accumulate/logical
+//===----------------------------------------------------------------------===//
+
+// Rx[+-&|]=asr(Rs,#u5)
+// Rx[+-&|^]=lsr(Rs,#u5)
+// Rx[+-&|^]=asl(Rs,#u5)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
+                         SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+  : SInst_acc<(outs IntRegs:$Rx),
+              (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5),
+  "$Rx "#opc2#opc1#"($Rs, #$u5)",
+  [(set (i32 IntRegs:$Rx),
+         (OpNode2 (i32 IntRegs:$src1),
+                  (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))],
+  "$src1 = $Rx", S_2op_tc_2_SLOT23> {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<5> u5;
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = 0b1110;
+    let Inst{23-22} = majOp{2-1};
+    let Inst{13} = 0b0;
+    let Inst{7} = majOp{0};
+    let Inst{6-5} = minOp;
+    let Inst{4-0} = Rx;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = u5;
+  }
+
+// Rx[+-&|]=asr(Rs,Rt)
+// Rx[+-&|^]=lsr(Rs,Rt)
+// Rx[+-&|^]=asl(Rs,Rt)
+
+let hasNewValue = 1, opNewValue = 0 in
+class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
+                         SDNode OpNode2, bits<2> majOp, bits<2> minOp>
+  : SInst_acc<(outs IntRegs:$Rx),
+              (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rx "#opc2#opc1#"($Rs, $Rt)",
+  [(set (i32 IntRegs:$Rx),
+         (OpNode2 (i32 IntRegs:$src1),
+                  (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))],
+  "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{23-22} = majOp;
+    let Inst{7-6} = minOp;
+    let Inst{4-0} = Rx;
+    let Inst{20-16} = Rs;
+    let Inst{12-8} = Rt;
+  }
+
+// Rxx[+-&|]=asr(Rss,#u6)
+// Rxx[+-&|^]=lsr(Rss,#u6)
+// Rxx[+-&|^]=asl(Rss,#u6)
+
+class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
+                         SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+  : SInst_acc<(outs DoubleRegs:$Rxx),
+              (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6),
+  "$Rxx "#opc2#opc1#"($Rss, #$u6)",
+  [(set (i64 DoubleRegs:$Rxx),
+        (OpNode2 (i64 DoubleRegs:$src1),
+                 (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))],
+  "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<6> u6;
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = 0b0010;
+    let Inst{23-22} = majOp{2-1};
+    let Inst{7} = majOp{0};
+    let Inst{6-5} = minOp;
+    let Inst{4-0} = Rxx;
+    let Inst{20-16} = Rss;
+    let Inst{13-8} = u6;
+  }
+
+
+// Rxx[+-&|]=asr(Rss,Rt)
+// Rxx[+-&|^]=lsr(Rss,Rt)
+// Rxx[+-&|^]=asl(Rss,Rt)
+// Rxx[+-&|^]=lsl(Rss,Rt)
+
+class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
+                         SDNode OpNode2, bits<3> majOp, bits<2> minOp>
+  : SInst_acc<(outs DoubleRegs:$Rxx),
+              (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rxx "#opc2#opc1#"($Rss, $Rt)",
+  [(set (i64 DoubleRegs:$Rxx),
+        (OpNode2 (i64 DoubleRegs:$src1),
+                 (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))],
+  "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b1011;
+    let Inst{23-21} = majOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rt;
+    let Inst{7-6} = minOp;
+    let Inst{4-0} = Rxx;
+  }
+
+//===----------------------------------------------------------------------===//
+// Multi-class for the shift instructions with logical/arithmetic operators.
+//===----------------------------------------------------------------------===//
+
+multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1,
+                         SDNode OpNode2, bits<3> majOp, bits<2> minOp > {
+  def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1,
+                                     OpNode2, majOp, minOp >;
+  def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1,
+                                     OpNode2, majOp, minOp >;
+}
+
+multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
+  let AddedComplexity = 100 in
+  defm _acc  : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>;
+
+  defm _nac  : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>;
+  defm _and  : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>;
+  defm _or   : xtype_imm_base< opc1, "|= ", OpNode,  or, 0b011, minOp>;
+}
+
+multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
+let AddedComplexity = 100 in
+  defm _xacc  : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
+}
+
+defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
+
+defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
+              xtype_xor_imm_acc<"lsr", srl, 0b01>;
+
+defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
+              xtype_xor_imm_acc<"asl", shl, 0b10>;
+
+multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
+  let AddedComplexity = 100 in
+  def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>;
+
+  def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>;
+  def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>;
+  def _or  : T_shift_reg_acc_r <opc1, "|= ", OpNode,  or, 0b00, minOp>;
+}
+
+multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> {
+  let AddedComplexity = 100 in
+  def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>;
+
+  def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>;
+  def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>;
+  def _or  : T_shift_reg_acc_p <opc1, "|= ", OpNode,  or, 0b000, minOp>;
+  def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>;
+}
+
+multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
+  defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>;
+  defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
+}
+
+defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
+defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
+defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
+defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
+
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp,
+                bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0>
+  : SInst <(outs RC:$dst),
+           (ins DoubleRegs:$src1, DoubleRegs:$src2),
+  "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "")
+                                     #!if(hasShift,":>>1","")
+                                     #!if(isSat, ":sat", ""),
+  [], "", S_3op_tc_2_SLOT23 > {
+    bits<5> dst;
+    bits<5> src1;
+    bits<5> src2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0001;
+    let Inst{23-22} = MajOp;
+    let Inst{20-16} = !if (SwapOps, src2, src1);
+    let Inst{12-8}  = !if (SwapOps, src1, src2);
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = dst;
+  }
+
+class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
+                 bit isSat = 0, bit isRnd = 0, bit hasShift = 0 >
+  : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
+              isSat, isRnd, hasShift>;
+
+let Itinerary = S_3op_tc_1_SLOT23 in {
+  def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
+  def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
+  def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
+  def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
+
+  def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
+  def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
+}
+
+def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
+
+let hasSideEffects = 0 in
+class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
+  : SInst < (outs DoubleRegs:$Rdd),
+            (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+  "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+    bits<2> Pu;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = !if (SwapOps, Rtt, Rss);
+    let Inst{12-8} = !if (SwapOps, Rss, Rtt);
+    let Inst{6-5} = Pu;
+    let Inst{4-0} = Rdd;
+  }
+
+def S2_valignrb  : T_S3op_2 < "valignb",  0b000, 1>;
+def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
+
+//===----------------------------------------------------------------------===//
+// Template class used by vector shift, vector rotate, vector neg,
+// 32-bit shift, 64-bit shifts, etc.
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
+                 bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] >
+  : SInst <(outs RC:$dst),
+           (ins RC:$src1, IntRegs:$src2),
+  "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""),
+  pattern, "", S_3op_tc_1_SLOT23> {
+    bits<5> dst;
+    bits<5> src1;
+    bits<5> src2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011);
+    let Inst{23-22} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{12-8} = src2;
+    let Inst{7-6} = MinOp;
+    let Inst{4-0} = dst;
+  }
+
+let hasNewValue = 1 in
+class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+  : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0,
+    [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
+                                      (i32 IntRegs:$src2)))]>;
+
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
+class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
+  : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>;
+
+
+class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
+  : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0,
+    [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
+                                         (i32 IntRegs:$src2)))]>;
+
+
+class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
+  : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>;
+
+
+// Shift by register
+// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
+
+def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
+def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
+def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
+def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
+
+// Rd=[asr|lsr|asl|lsl](Rs,Rt)
+
+def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
+def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
+def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
+def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
+
+// Shift by register with saturation
+// Rd=asr(Rs,Rt):sat
+// Rd=asl(Rs,Rt):sat
+
+let Defs = [USR_OVF] in {
+  def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
+  def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
+  : SInst < (outs IntRegs:$Rd),
+            (ins DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
+                           #!if(hasShift, ":<<1", "")
+                           #!if(isRnd, ":rnd", "")
+                           #!if(isSat, ":sat", ""),
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rd;
+    bits<5> Rss;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0101;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = Rd;
+  }
+
+def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
+
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
+
+let hasSideEffects = 0 in
+class T_S3op_7 <string mnemonic, bit MajOp >
+  : SInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3),
+  "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
+  [], "", S_3op_tc_1_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+    bits<3> u3;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0000;
+    let Inst{23}    = MajOp;
+    let Inst{20-16} = !if(MajOp, Rss, Rtt);
+    let Inst{12-8}  =  !if(MajOp, Rtt, Rss);
+    let Inst{7-5}   = u3;
+    let Inst{4-0}   = Rdd;
+  }
+
+def S2_valignib  : T_S3op_7 < "valignb", 0>;
+def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
+
+//===----------------------------------------------------------------------===//
+// Template class for 'insert bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+class T_S3op_insert <string mnemonic, RegisterClass RC>
+  : SInst <(outs RC:$dst),
+           (ins RC:$src1, RC:$src2, DoubleRegs:$src3),
+  "$dst = "#mnemonic#"($src2, $src3)" ,
+  [], "$src1 = $dst", S_3op_tc_1_SLOT23 > {
+    bits<5> dst;
+    bits<5> src2;
+    bits<5> src3;
+
+    let IClass = 0b1100;
+
+    let Inst{27-26} = 0b10;
+    let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10);
+    let Inst{23}    = 0b0;
+    let Inst{20-16} = src2;
+    let Inst{12-8}  = src3;
+    let Inst{4-0}   = dst;
+  }
+
+let hasSideEffects = 0 in
+class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
+  : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3),
+  "$dst = insert($src1, #$src2, #$src3)",
+  [], "$dst2 = $dst", S_2op_tc_2_SLOT23> {
+    bits<5> dst;
+    bits<5> src1;
+    bits<6> src2;
+    bits<6> src3;
+    bit bit23;
+    bit bit13;
+    string ImmOpStr = !cast<string>(ImmOp);
+
+    let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0);
+    let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = RegTyBits;
+    let Inst{23}    = bit23;
+    let Inst{22-21} = src3{4-3};
+    let Inst{20-16} = src1;
+    let Inst{13}    = bit13;
+    let Inst{12-8}  = src2{4-0};
+    let Inst{7-5}   = src3{2-0};
+    let Inst{4-0}   = dst;
+  }
+
+// Rx=insert(Rs,Rtt)
+// Rx=insert(Rs,#u5,#U5)
+let hasNewValue = 1 in {
+  def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
+  def S2_insert    : T_S2op_insert <0b1111, IntRegs, u5Imm>;
+}
+
+// Rxx=insert(Rss,Rtt)
+// Rxx=insert(Rss,#u6,#U6)
+def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
+def S2_insertp    : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
+
+
+def SDTHexagonINSERT:
+  SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT   : SDNode<"HexagonISD::INSERT",   SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+         (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+         (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+         (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+         (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+                                     (i32 (extloadi8  (add I32:$b, 3))),
+                                     24, 8),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+             (zextloadi8 I32:$b)),
+         (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
+
+//===----------------------------------------------------------------------===//
+// Template class for 'extract bitfield' instructions
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_S3op_extract <string mnemonic, bits<2> MinOp>
+  : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+  "$Rd = "#mnemonic#"($Rs, $Rtt)",
+  [], "", S_3op_tc_2_SLOT23 > {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<5> Rtt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-22} = 0b100100;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-6}   = MinOp;
+    let Inst{4-0}   = Rd;
+  }
+
+let hasSideEffects = 0 in
+class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
+                      RegisterClass RC, Operand ImmOp>
+  : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3),
+  "$dst = "#mnemonic#"($src1, #$src2, #$src3)",
+  [], "", S_2op_tc_2_SLOT23> {
+    bits<5> dst;
+    bits<5> src1;
+    bits<6> src2;
+    bits<6> src3;
+    bit bit23;
+    bit bit13;
+    string ImmOpStr = !cast<string>(ImmOp);
+
+    let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5},
+                !if (!eq(mnemonic, "extractu"), 0, 1));
+
+    let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = RegTyBits;
+    let Inst{23}    = bit23;
+    let Inst{22-21} = src3{4-3};
+    let Inst{20-16} = src1;
+    let Inst{13}    = bit13;
+    let Inst{12-8}  = src2{4-0};
+    let Inst{7-5}   = src3{2-0};
+    let Inst{4-0}   = dst;
+  }
+
+// Extract bitfield
+
+// Rdd=extractu(Rss,Rtt)
+// Rdd=extractu(Rss,#u6,#U6)
+def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
+def S2_extractup    : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
+
+// Rd=extractu(Rs,Rtt)
+// Rd=extractu(Rs,#u5,#U5)
+let hasNewValue = 1 in {
+  def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
+  def S2_extractu    : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
+}
+
+def SDTHexagonEXTRACTU:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU   : SDNode<"HexagonISD::EXTRACTU",   SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+         (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+         (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+         (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+         (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
+         (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// :raw for of tableindx[bdhw] insns
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class tableidxRaw<string OpStr, bits<2>MinOp>
+  : SInst <(outs IntRegs:$Rx),
+           (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6),
+           "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
+    [], "$Rx = $_dst_" > {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<4> u4;
+    bits<6> S6;
+
+    let IClass = 0b1000;
+
+    let Inst{27-24} = 0b0111;
+    let Inst{23-22} = MinOp;
+    let Inst{21}    = u4{3};
+    let Inst{20-16} = Rs;
+    let Inst{13-8}  = S6;
+    let Inst{7-5}   = u4{2-0};
+    let Inst{4-0}   = Rx;
+  }
+
+def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
+def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
+def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
+def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
+
+//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+  : SInst <(outs IntRegs:$Rx),
+           (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+           "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+           [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV5.td"
+
+//===----------------------------------------------------------------------===//
+// V5 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/64/Vector +
+//===----------------------------------------------------------------------===///
+
+include "HexagonInstrInfoVector.td"
+
+include "HexagonInstrAlias.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 0000000..84d035d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,266 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1,
+    isExtended = 0, isExtendable = 1, opExtendable = 0,
+    isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
+class T_Call<string ExtStr>
+  : JInst<(outs), (ins calltarget:$dst),
+      "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
+  let BaseOpcode = "call";
+  bits<24> dst;
+
+  let IClass = 0b0101;
+  let Inst{27-25} = 0b101;
+  let Inst{24-16,13-1} = dst{23-2};
+  let Inst{0} = 0b0;
+}
+
+let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1,
+    isExtended = 0, isExtendable = 1, opExtendable = 1,
+    isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
+class T_CallPred<bit IfTrue, string ExtStr>
+  : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst),
+      CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst",
+      [], "", J_tc_2early_SLOT23> {
+  let BaseOpcode = "call";
+  let isPredicatedFalse = !if(IfTrue,0,1);
+  bits<2> Pu;
+  bits<17> dst;
+
+  let IClass = 0b0101;
+  let Inst{27-24} = 0b1101;
+  let Inst{23-22,20-16,13,7-1} = dst{16-2};
+  let Inst{21} = !if(IfTrue,0,1);
+  let Inst{11} = 0b0;
+  let Inst{9-8} = Pu;
+}
+
+multiclass T_Calls<string ExtStr> {
+  def NAME : T_Call<ExtStr>;
+  def t    : T_CallPred<1, ExtStr>;
+  def f    : T_CallPred<0, ExtStr>;
+}
+
+defm J2_call: T_Calls<"">, PredRel;
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in
+def CALLv3nr :  T_Call<"">, PredRel;
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+
+let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
+  def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
+def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
+
+class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
+  : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
+
+def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
+def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
+
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
+  (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
+  [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
+                                        (i64 DoubleRegs:$Rt))))],
+  "", ALU64_tc_1_SLOT23>;
+
+
+let hasSideEffects = 0 in
+class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
+  : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs),
+  "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
+          #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+
+  let Inst{27-23} = 0b00111;
+  let Inst{22-21} = !if(isMax, 0b10, 0b01);
+  let Inst{20-16} = !if(isMax, Rt, Rs);
+  let Inst{12-8} = !if(isMax, Rs, Rt);
+  let Inst{7} = 0b1;
+  let Inst{6} = !if(isMax, 0b0, 0b1);
+  let Inst{5} = isUnsigned;
+  let Inst{4-0} = Rd;
+}
+
+def A2_minp  : T_XTYPE_MIN_MAX_P<0, 0>;
+def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
+def A2_maxp  : T_XTYPE_MIN_MAX_P<1, 0>;
+def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
+
+multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+  defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
+}
+
+let AddedComplexity = 200 in {
+  defm: MinMax_pats_p<setge,  A2_maxp,  A2_minp>;
+  defm: MinMax_pats_p<setgt,  A2_maxp,  A2_minp>;
+  defm: MinMax_pats_p<setle,  A2_minp,  A2_maxp>;
+  defm: MinMax_pats_p<setlt,  A2_minp,  A2_maxp>;
+  defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
+  defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
+  defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
+  defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
+
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
+
+// Map call instruction
+def : Pat<(callv3 (i32 IntRegs:$dst)),
+      (J2_callr (i32 IntRegs:$dst))>;
+def : Pat<(callv3 tglobaladdr:$dst),
+      (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+      (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+      (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr (i32 IntRegs:$dst)),
+      (CALLRv3nr (i32 IntRegs:$dst))>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+      (CALLv3nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+      (CALLv3nr texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// :raw form of vrcmpys:hi/lo insns
+//===----------------------------------------------------------------------===//
+// Vector reduce complex multiply by scalar.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
+  MInst<(outs DoubleRegs:$Rdd),
+         (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+         "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b100;
+    let Inst{4-0}   = Rdd;
+}
+
+def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
+def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def M2_vrcmpys_s1
+ : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+ "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
+
+// Vector reduce complex multiply by scalar with accumulation.
+let Defs = [USR_OVF], hasSideEffects = 0 in
+class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
+  MInst <(outs DoubleRegs:$Rxx),
+         (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
+  "$Rxx = $_src_"> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b100;
+    let Inst{4-0}   = Rxx;
+  }
+
+def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
+def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
+
+// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
+
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_acc_s1
+  : MInst <(outs DoubleRegs:$dst),
+           (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
+           "$dst += vrcmpys($src1, $src2):<<1:sat", [],
+           "$dst2 = $dst">;
+
+def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
+def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
+
+// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
+let isAsmParserOnly = 1 in
+def M2_vrcmpys_s1rp
+  : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
+
+
+// S2_cabacdecbin: Cabac decode bin.
+let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
+def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 0000000..87d6b35
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,4251 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def DuplexIClass0:  InstDuplex < 0 >;
+def DuplexIClass1:  InstDuplex < 1 >;
+def DuplexIClass2:  InstDuplex < 2 >;
+let isExtendable = 1 in {
+  def DuplexIClass3:  InstDuplex < 3 >;
+  def DuplexIClass4:  InstDuplex < 4 >;
+  def DuplexIClass5:  InstDuplex < 5 >;
+  def DuplexIClass6:  InstDuplex < 6 >;
+  def DuplexIClass7:  InstDuplex < 7 >;
+}
+def DuplexIClass8:  InstDuplex < 8 >;
+def DuplexIClass9:  InstDuplex < 9 >;
+def DuplexIClassA:  InstDuplex < 0xA >;
+def DuplexIClassB:  InstDuplex < 0xB >;
+def DuplexIClassC:  InstDuplex < 0xC >;
+def DuplexIClassD:  InstDuplex < 0xD >;
+def DuplexIClassE:  InstDuplex < 0xE >;
+def DuplexIClassF:  InstDuplex < 0xF >;
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
+let hasSideEffects = 0 in
+class T_Immext<Operand ImmType>
+  : EXTENDERInst<(outs), (ins ImmType:$imm),
+                 "immext(#$imm)", []> {
+    bits<32> imm;
+    let IClass = 0b0000;
+
+    let Inst{27-16} = imm{31-20};
+    let Inst{13-0} = imm{19-6};
+  }
+
+def A4_ext : T_Immext<u26_6Imm>;
+let isCodeGenOnly = 1 in {
+  let isBranch = 1 in
+    def A4_ext_b : T_Immext<brtarget>;
+  let isCall = 1 in
+    def A4_ext_c : T_Immext<calltarget>;
+  def A4_ext_g : T_Immext<globaladdress>;
+}
+
+def BITPOS32 : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-31].
+   // As an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                      bit OpsRev>
+  : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> {
+  let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
+}
+
+let BaseOpcode = "andn_rr", CextOpcode = "andn" in
+def A4_andn    : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
+let BaseOpcode = "orn_rr", CextOpcode = "orn" in
+def A4_orn     : T_ALU32_3op_not<"or",  0b001, 0b101, 1>;
+
+let CextOpcode = "rcmp.eq" in
+def A4_rcmpeq  : T_ALU32_3op<"cmp.eq",  0b011, 0b010, 0, 1>;
+let CextOpcode = "!rcmp.eq" in
+def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
+
+def C4_cmpneq  : T_ALU32_3op_cmp<"!cmp.eq",  0b00, 1, 1>;
+def C4_cmplte  : T_ALU32_3op_cmp<"!cmp.gt",  0b10, 1, 0>;
+def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
+
+// Pats for instruction selection.
+
+// A class to embed the usual comparison patfrags within a zext to i32.
+// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
+// names, or else the frag's "body" won't match the operands.
+class CmpInReg<PatFrag Op>
+  : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
+
+def: T_cmp32_rr_pat<A4_rcmpeq,  CmpInReg<seteq>, i32>;
+def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+
+def: T_cmp32_rr_pat<C4_cmpneq,  setne,  i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
+class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
+  : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
+    "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
+    ImmRegRel {
+  let InputType = "reg";
+  let CextOpcode = mnemonic;
+  let isCompare = 1;
+  let isCommutable = IsComm;
+  let hasSideEffects = 0;
+
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1100;
+  let Inst{27-21} = 0b0111110;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7-5} = MinOp;
+  let Inst{1-0} = Pd;
+}
+
+def A4_cmpbeq  : T_CMP_rrbh<"cmpb.eq",  0b110, 1>;
+def A4_cmpbgt  : T_CMP_rrbh<"cmpb.gt",  0b010, 0>;
+def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
+def A4_cmpheq  : T_CMP_rrbh<"cmph.eq",  0b011, 1>;
+def A4_cmphgt  : T_CMP_rrbh<"cmph.gt",  0b100, 0>;
+def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
+
+let AddedComplexity = 100 in {
+  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                       255), 0)),
+           (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                       255), 0)),
+           (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                           65535), 0)),
+           (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+                           65535), 0)),
+           (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
+}
+
+class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
+                 Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
+  : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
+    "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
+    ImmRegRel {
+  let InputType = "imm";
+  let CextOpcode = mnemonic;
+  let isCompare = 1;
+  let isCommutable = IsComm;
+  let hasSideEffects = 0;
+  let isExtendable = IsImmExt;
+  let opExtendable = !if (IsImmExt, 2, 0);
+  let isExtentSigned = IsImmSigned;
+  let opExtentBits = ImmBits;
+
+  bits<2> Pd;
+  bits<5> Rs;
+  bits<8> Imm;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b1101;
+  let Inst{22-21} = MajOp;
+  let Inst{20-16} = Rs;
+  let Inst{12-5} = Imm;
+  let Inst{4} = 0b0;
+  let Inst{3} = IsHalf;
+  let Inst{1-0} = Pd;
+}
+
+def A4_cmpbeqi  : T_CMP_ribh<"cmpb.eq",  0b00, 0, 1, u8Imm, 0, 0, 8>;
+def A4_cmpbgti  : T_CMP_ribh<"cmpb.gt",  0b01, 0, 0, s8Imm, 0, 1, 8>;
+def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
+def A4_cmpheqi  : T_CMP_ribh<"cmph.eq",  0b00, 1, 1, s8Ext, 1, 1, 8>;
+def A4_cmphgti  : T_CMP_ribh<"cmph.gt",  0b01, 1, 0, s8Ext, 1, 1, 8>;
+def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
+
+class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
+  : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
+    "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
+    ImmRegRel {
+  let InputType = "imm";
+  let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
+  let isExtendable = 1;
+  let opExtendable = 2;
+  let isExtentSigned = 1;
+  let opExtentBits = 8;
+  let hasNewValue = 1;
+
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<8> s8;
+
+  let IClass = 0b0111;
+  let Inst{27-24} = 0b0011;
+  let Inst{22} = 0b1;
+  let Inst{21} = IsNeg;
+  let Inst{20-16} = Rs;
+  let Inst{13} = 0b1;
+  let Inst{12-5} = s8;
+  let Inst{4-0} = Rd;
+}
+
+def A4_rcmpeqi  : T_RCMP_EQ_ri<"cmp.eq",  0>;
+def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
+
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+         (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+         (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
+
+// Preserve the S2_tstbit_r generation
+def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+                                         (i32 IntRegs:$src1))), 0)))),
+         (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine a word and an immediate into a register pair.
+let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1,
+    opExtentBits = 8 in
+class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
+  : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> {
+    bits<5> Rdd;
+    bits<5> Rs;
+    bits<8> s8;
+
+    let IClass      = 0b0111;
+    let Inst{27-24} = 0b0011;
+    let Inst{22-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = 0b1;
+    let Inst{12-5}  = s8;
+    let Inst{4-0}   = Rdd;
+  }
+
+let opExtendable = 2 in
+def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
+                                    "$Rdd = combine($Rs, #$s8)">;
+
+let opExtendable = 1 in
+def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
+                                    "$Rdd = combine(#$s8, $Rs)">;
+
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+         (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
+
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+         (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
+
+// A4_combineii: Set two small immediates.
+let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
+def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
+  "$Rdd = combine(#$s8, #$U6)"> {
+    bits<5> Rdd;
+    bits<8> s8;
+    bits<6> U6;
+
+    let IClass = 0b0111;
+    let Inst{27-23} = 0b11001;
+    let Inst{20-16} = U6{5-1};
+    let Inst{13}    = U6{0};
+    let Inst{12-5}  = s8;
+    let Inst{4-0}   = Rdd;
+  }
+
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
+         (A4_combineii imm:$s8, imm:$u6)>;
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+
+def Zext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A4_combineir 0, (i32 $Rs)))>;
+def Sext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+                      PatLeaf ImmPred, InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)),
+           (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+  def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+           (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+  def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+           (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+  def: Pat<(VT (Load (i32 IntRegs:$Rs))),
+           (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16,  i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8,  i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// Template class for load instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
+    hasSideEffects = 0 in
+class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
+            LDInst<(outs RC:$dst1, IntRegs:$dst2),
+            (ins u6Ext:$addr),
+            "$dst1 = "#mnemonic#"($dst2 = #$addr)",
+            []> {
+  bits<7> name;
+  bits<5> dst1;
+  bits<5> dst2;
+  bits<6> addr;
+
+  let IClass = 0b1001;
+  let Inst{27-25} = 0b101;
+  let Inst{24-21} = MajOp;
+  let Inst{13-12} = 0b01;
+  let Inst{4-0}   = dst1;
+  let Inst{20-16} = dst2;
+  let Inst{11-8}  = addr{5-2};
+  let Inst{6-5}   = addr{1-0};
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+  def L4_loadrb_ap   : T_LD_abs_set <"memb",   IntRegs, 0b1000>;
+  def L4_loadrub_ap  : T_LD_abs_set <"memub",  IntRegs, 0b1001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+  def L4_loadrh_ap  : T_LD_abs_set <"memh",  IntRegs, 0b1010>;
+  def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
+  def L4_loadbsw2_ap : T_LD_abs_set <"membh",  IntRegs, 0b0001>;
+  def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+  def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
+
+let accessSize = WordAccess in {
+  def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
+  def L4_loadbsw4_ap : T_LD_abs_set <"membh",  DoubleRegs, 0b0111>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
+
+let accessSize = ByteAccess in
+  def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
+
+let accessSize = HalfWordAccess in
+def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
+
+// Load - Indirect with long offset
+let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
+opExtentBits = 6, opExtendable = 3 in
+class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+                    bits<4> MajOp>
+  : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3),
+  "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
+  [] >, ImmRegShl {
+    bits<5> dst;
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    let CextOpcode = CextOp;
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+    let IClass = 0b1001;
+    let Inst{27-25} = 0b110;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12}    = 0b1;
+    let Inst{11-8}  = src3{5-2};
+    let Inst{7}     = src2{0};
+    let Inst{6-5}   = src3{1-0};
+    let Inst{4-0}   = dst;
+  }
+
+let accessSize = ByteAccess in {
+  def L4_loadrb_ur  : T_LoadAbsReg<"memb",  "LDrib", IntRegs, 0b1000>;
+  def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
+  def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
+                                      DoubleRegs, 0b0100>;
+}
+
+let accessSize = HalfWordAccess in {
+  def L4_loadrh_ur   : T_LoadAbsReg<"memh",   "LDrih",    IntRegs, 0b1010>;
+  def L4_loadruh_ur  : T_LoadAbsReg<"memuh",  "LDriuh",   IntRegs, 0b1011>;
+  def L4_loadbsw2_ur : T_LoadAbsReg<"membh",  "LDribh2",  IntRegs, 0b0001>;
+  def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
+  def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
+                                      DoubleRegs, 0b0010>;
+}
+
+let accessSize = WordAccess in {
+  def L4_loadri_ur   : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
+  def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
+  def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
+}
+
+let accessSize = DoubleWordAccess in
+def L4_loadrd_ur  : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
+
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tglobaladdr:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tglobaladdr:$src2)))),
+              (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tconstpool:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tconstpool:$src2)))),
+              (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                             (HexagonCONST32 tjumptable:$src3)))),
+              (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tjumptable:$src2)))),
+              (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity  = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8,  L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16,  L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template classes for the non-predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
+   LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2),
+  "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
+  [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
+    bits<5> dst;
+    bits<5> src1;
+    bits<5> src2;
+    bits<2> u2;
+
+    let IClass = 0b0011;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{12-8}  = src2;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{4-0}   = dst;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated load instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated =  1 in
+class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+                  bit isNot, bit isPredNew>:
+   LDInst <(outs RC:$dst),
+           (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2),
+  !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
+  [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
+    bits<5> dst;
+    bits<2> src1;
+    bits<5> src2;
+    bits<5> src3;
+    bits<2> u2;
+
+    let isPredicatedFalse = isNot;
+    let isPredicatedNew = isPredNew;
+
+    let IClass = 0b0011;
+
+    let Inst{27-26} = 0b00;
+    let Inst{25}    = isPredNew;
+    let Inst{24}    = isNot;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{12-8}  = src3;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{6-5}   = src1;
+    let Inst{4-0}   = dst;
+  }
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with base + register offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, addrMode = BaseRegOffset in
+multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
+                        bits<3> MajOp > {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl,
+      InputType = "reg" in {
+    let isPredicable = 1 in
+    def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>;
+
+    // Predicated
+    def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>;
+    def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>;
+
+    // Predicated new
+    def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>;
+    def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>;
+  }
+}
+
+let hasNewValue = 1, accessSize = ByteAccess in {
+  defm loadrb  : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
+  defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
+}
+
+let hasNewValue = 1, accessSize = HalfWordAccess in {
+  defm loadrh  : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
+  defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
+}
+
+let hasNewValue = 1, accessSize = WordAccess in
+defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
+
+let accessSize = DoubleWordAccess in
+defm loadrd  : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add (i32 IntRegs:$Rs),
+                       (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
+let AddedComplexity = 40 in {
+  def: Loadxs_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_pat<load,        i64, L4_loadrd_rr>;
+}
+
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+  def: Loadxs_simple_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_simple_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_simple_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_simple_pat<load,        i64, L4_loadrd_rr>;
+}
+
+// zext i1->i64
+def: Pat<(i64 (zext (i1 PredRegs:$src1))),
+         (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(i64 (zext (i32 IntRegs:$src1))),
+         (Zext64 IntRegs:$src1)>;
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 1, opExtentBits = 6,
+    addrMode = AbsoluteSet in
+class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
+                   bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+  : STInst<(outs IntRegs:$dst),
+           (ins u6Ext:$addr, RC:$src),
+    mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
+    bits<5> dst;
+    bits<6> addr;
+    bits<5> src;
+    let accessSize = AccessSz;
+    let BaseOpcode = BaseOp#"_AbsSet";
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b1010;
+
+    let Inst{27-24} = 0b1011;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = dst;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = src;
+    let Inst{7}     = 0b1;
+    let Inst{5-0}   = addr;
+  }
+
+def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
+                                 HalfWordAccess>;
+def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+
+let isNVStorable = 0 in {
+  def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
+                                   0b011, HalfWordAccess, 1>;
+  def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
+                                   0b110, DoubleWordAccess>;
+}
+
+let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
+isExtended = 1, opExtentBits= 6 in
+class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
+                      MemAccessSize AccessSz >
+  : NVInst <(outs IntRegs:$dst),
+            (ins u6Ext:$addr, IntRegs:$src),
+    mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
+    bits<5> dst;
+    bits<6> addr;
+    bits<3> src;
+    let accessSize = AccessSz;
+    let BaseOpcode = BaseOp#"_AbsSet";
+
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = dst;
+    let Inst{13-11} = 0b000;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src;
+    let Inst{7}     = 0b1;
+    let Inst{5-0}   = addr;
+  }
+
+let mayStore = 1, addrMode = AbsoluteSet in {
+  def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
+  def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
+  def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
+}
+
+let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
+    addrMode = BaseLongOffset, AddedComplexity = 40 in
+class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
+                     bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
+  : STInst<(outs),
+           (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4),
+   mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
+   []>, ImmRegShl, NewValueRel {
+
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    bits<5> src4;
+
+    let accessSize = AccessSz;
+    let CextOpcode = CextOp;
+    let BaseOpcode = CextOp#"_shl";
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b1010;
+
+    let Inst{27-24} =0b1101;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12-8}  = src4;
+    let Inst{7}     = 0b1;
+    let Inst{6}     = src2{0};
+    let Inst{5-0}   = src3;
+}
+
+def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
+def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
+                                   HalfWordAccess>;
+def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
+                                   HalfWordAccess, 1>;
+def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
+def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
+                                   DoubleWordAccess>;
+
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+                           PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                      u32ImmPred:$src3)),
+          (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                      (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+                 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
+    opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
+class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
+                       MemAccessSize AccessSz>
+  : NVInst <(outs ),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4),
+  mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
+    bits<5> src1;
+    bits<2> src2;
+    bits<6> src3;
+    bits<3> src4;
+
+    let CextOpcode  = CextOp;
+    let BaseOpcode  = CextOp#"_shl";
+    let IClass      = 0b1010;
+
+    let Inst{27-21} = 0b1101101;
+    let Inst{12-11} = 0b00;
+    let Inst{7}     = 0b1;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2{1};
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src4;
+    let Inst{6}     = src2{0};
+    let Inst{5-0}   = src3;
+  }
+
+def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
+def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
+def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Template classes for the non-predicated store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicable = 1 in
+class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
+  : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+  mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+  [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
+
+    bits<5> Rs;
+    bits<5> Ru;
+    bits<2> u2;
+    bits<5> Rt;
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+    let IClass = 0b0011;
+
+    let Inst{27-24} = 0b1011;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Ru;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{4-0}   = Rt;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
+                   bit isNot, bit isPredNew, bit isH>
+  : STInst <(outs),
+            (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+
+  !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
+  [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{
+    bits<2> Pv;
+    bits<5> Rs;
+    bits<5> Ru;
+    bits<2> u2;
+    bits<5> Rt;
+
+    let isPredicatedFalse = isNot;
+    let isPredicatedNew = isPredNew;
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+    let IClass = 0b0011;
+
+    let Inst{27-26} = 0b01;
+    let Inst{25}    = isPredNew;
+    let Inst{24}    = isNot;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Ru;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{6-5}   = Pv;
+    let Inst{4-0}   = Rt;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
+class T_store_new_rr <string mnemonic, bits<2> MajOp> :
+  NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+  mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
+  [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
+
+    bits<5> Rs;
+    bits<5> Ru;
+    bits<2> u2;
+    bits<3> Nt;
+
+    let IClass = 0b0011;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Ru;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{4-3}   = MajOp;
+    let Inst{2-0}   = Nt;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template classes for the predicated new-value store instructions with
+// base + register offset addressing mode
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
+class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
+  : NVInst<(outs),
+           (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+   !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+   ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
+   [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
+    bits<2> Pv;
+    bits<5> Rs;
+    bits<5> Ru;
+    bits<2> u2;
+    bits<3> Nt;
+
+    let isPredicatedFalse = isNot;
+    let isPredicatedNew = isPredNew;
+
+    let IClass = 0b0011;
+    let Inst{27-26} = 0b01;
+    let Inst{25}    = isPredNew;
+    let Inst{24}    = isNot;
+    let Inst{23-21} = 0b101;
+    let Inst{20-16} = Rs;
+    let Inst{12-8}  = Ru;
+    let Inst{13}    = u2{1};
+    let Inst{7}     = u2{0};
+    let Inst{6-5}   = Pv;
+    let Inst{4-3}   = MajOp;
+    let Inst{2-0}   = Nt;
+  }
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + register offset addressing
+// mode
+//===----------------------------------------------------------------------===//
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC,
+                       bits<3> MajOp, bit isH = 0> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>;
+
+    // Predicated
+    def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>;
+    def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>;
+
+    // Predicated new
+    def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>;
+    def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+//===----------------------------------------------------------------------===//
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
+                           bits<2> MajOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>;
+
+    // Predicated
+    def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>;
+    def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>;
+
+    // Predicated new
+    def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>;
+    def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>;
+  }
+}
+
+let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
+  let accessSize = ByteAccess in
+  defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
+                ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
+
+  let accessSize = HalfWordAccess in
+  defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>,
+                ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>;
+
+  let accessSize = WordAccess in
+  defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>,
+                ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>;
+
+  let isNVStorable = 0, accessSize = DoubleWordAccess in
+  defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>;
+
+  let isNVStorable = 0, accessSize = HalfWordAccess in
+  defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
+}
+
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
+                               (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))),
+        (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
+
+let AddedComplexity = 40 in {
+  def: Storexs_pat<truncstorei8,  I32, S4_storerb_rr>;
+  def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+  def: Storexs_pat<store,         I32, S4_storeri_rr>;
+  def: Storexs_pat<store,         I64, S4_storerd_rr>;
+}
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+//===----------------------------------------------------------------------===//
+// Template class
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+    opExtendable = 2 in
+class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
+  : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8),
+  mnemonic#"($Rs+#$offset)=#$S8",
+  [], "", V4LDST_tc_st_SLOT01>,
+  ImmRegRel, PredNewRel {
+    bits<5> Rs;
+    bits<8> S8;
+    bits<8> offset;
+    bits<6> offsetBits;
+
+    string OffsetOpStr = !cast<string>(OffsetOp);
+    let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+                     !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+                                         /* u6_0Imm */ offset{5-0}));
+
+    let IClass = 0b0011;
+
+    let Inst{27-25} = 0b110;
+    let Inst{22-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{12-7}  = offsetBits;
+    let Inst{13}    = S8{7};
+    let Inst{6-0}   = S8{6-0};
+  }
+
+let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
+    opExtendable = 3 in
+class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+                       bit isPredNot, bit isPredNew >
+  : STInst <(outs ),
+            (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6),
+  !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($Rs+#$offset)=#$S6",
+  [], "", V4LDST_tc_st_SLOT01>,
+  ImmRegRel, PredNewRel {
+    bits<2> Pv;
+    bits<5> Rs;
+    bits<6> S6;
+    bits<8> offset;
+    bits<6> offsetBits;
+
+    string OffsetOpStr = !cast<string>(OffsetOp);
+    let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
+                     !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
+                                         /* u6_0Imm */ offset{5-0}));
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isPredNot;
+
+    let IClass = 0b0011;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24}    = isPredNew;
+    let Inst{23}    = isPredNot;
+    let Inst{22-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = S6{5};
+    let Inst{12-7}  = offsetBits;
+    let Inst{6-5}   = Pv;
+    let Inst{4-0}   = S6{4-0};
+  }
+
+
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
+
+multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
+                        bit PredNot> {
+  def _io    : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>;
+  // Predicate new
+  def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>;
+}
+
+multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
+                   bits<2> MajOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+    def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>;
+
+    defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>;
+    defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>;
+  }
+}
+
+let hasSideEffects = 0, addrMode = BaseImmOffset,
+    InputType = "imm" in {
+  let accessSize = ByteAccess in
+  defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
+
+  let accessSize = HalfWordAccess in
+  defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>;
+
+  let accessSize = WordAccess in
+  defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
+}
+
+def IMM_BYTE : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 255 etc
+  // assigning to a byte restores our desired signed value.
+  int8_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 65535 etc
+  // assigning to a short restores our desired signed value.
+  int16_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+  // -1 etc can be represented as 4294967295 etc
+  // Currently, it's not doing this. But some optimization
+  // might convert -1 to a large +ve number.
+  // assigning to a word restores our desired signed value.
+  int32_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
+
+let AddedComplexity = 40 in {
+  // Not using frameindex patterns for these stores, because the offset
+  // is not extendable. This could cause problems during removing the frame
+  // indices, since the offset with respect to R29/R30 may not fit in the
+  // u6 field.
+  def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
+                       S4_storeirb_io>;
+  def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
+                       S4_storeirh_io>;
+  def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
+                       S4_storeiri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8,  s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store,         s32ImmPred, ToImmWord, S4_storeiri_io>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+// Store halfword.
+// TODO: needs to be implemented
+// memh(Re=#U6)=Rt.H
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
+class T_store_io_nv <string mnemonic, RegisterClass RC,
+                    Operand ImmOp, bits<2>MajOp>
+  : NVInst_V4 <(outs),
+               (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+  mnemonic#"($src1+#$src2) = $src3.new",
+  [],"",ST_tc_st_SLOT0> {
+    bits<5> src1;
+    bits<13> src2; // Actual address offset
+    bits<3> src3;
+    bits<11> offsetBits; // Represents offset encoding
+
+    let opExtentBits = !if (!eq(mnemonic, "memb"), 11,
+                       !if (!eq(mnemonic, "memh"), 12,
+                       !if (!eq(mnemonic, "memw"), 13, 0)));
+
+    let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+                        !if (!eq(mnemonic, "memh"), 1,
+                        !if (!eq(mnemonic, "memw"), 2, 0)));
+
+    let offsetBits = !if (!eq(mnemonic, "memb"),  src2{10-0},
+                     !if (!eq(mnemonic, "memh"),  src2{11-1},
+                     !if (!eq(mnemonic, "memw"),  src2{12-2}, 0)));
+
+    let IClass = 0b1010;
+
+    let Inst{27} = 0b0;
+    let Inst{26-25} = offsetBits{10-9};
+    let Inst{24-21} = 0b1101;
+    let Inst{20-16} = src1;
+    let Inst{13} = offsetBits{8};
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = src3;
+    let Inst{7-0} = offsetBits{7-0};
+  }
+
+let opExtendable = 2, opNewValue = 3, isPredicated = 1 in
+class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp,
+                         bits<2>MajOp, bit PredNot, bit isPredNew>
+  : NVInst_V4 <(outs),
+               (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4),
+  !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($src2+#$src3) = $src4.new",
+  [],"",V2LDST_tc_st_SLOT0> {
+    bits<2> src1;
+    bits<5> src2;
+    bits<9> src3;
+    bits<3> src4;
+    bits<6> offsetBits; // Represents offset encoding
+
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = PredNot;
+    let opExtentBits = !if (!eq(mnemonic, "memb"), 6,
+                       !if (!eq(mnemonic, "memh"), 7,
+                       !if (!eq(mnemonic, "memw"), 8, 0)));
+
+    let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
+                        !if (!eq(mnemonic, "memh"), 1,
+                        !if (!eq(mnemonic, "memw"), 2, 0)));
+
+    let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0},
+                     !if (!eq(mnemonic, "memh"), src3{6-1},
+                     !if (!eq(mnemonic, "memw"), src3{7-2}, 0)));
+
+    let IClass = 0b0100;
+
+    let Inst{27}    = 0b0;
+    let Inst{26}    = PredNot;
+    let Inst{25}    = isPredNew;
+    let Inst{24-21} = 0b0101;
+    let Inst{20-16} = src2;
+    let Inst{13}    = offsetBits{5};
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src4;
+    let Inst{7-3}   = offsetBits{4-0};
+    let Inst{2}     = 0b0;
+    let Inst{1-0}   = src1;
+  }
+
+// multiclass for new-value store instructions with base + immediate offset.
+//
+let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0,
+    isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<2> MajOp> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>;
+    // Predicated
+    def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>;
+    def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>;
+    // Predicated new
+    def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+                                              MajOp, 0, 1>;
+    def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
+                                              MajOp, 1, 1>;
+  }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm" in {
+  let accessSize = ByteAccess in
+  defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+                           u6_0Ext, 0b00>, AddrModeRel;
+
+  let accessSize = HalfWordAccess, opExtentAlign = 1 in
+  defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+                           u6_1Ext, 0b01>, AddrModeRel;
+
+  let accessSize = WordAccess, opExtentAlign = 2 in
+  defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+                           u6_2Ext, 0b10>, AddrModeRel;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment loads with register offset.
+//===----------------------------------------------------------------------===//
+
+let hasNewValue = 1 in
+def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
+
+def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
+
+let hasSideEffects = 0, addrMode = PostInc in
+class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
+  : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
+              (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
+  "$dst = "#mnemonic#"($src2++$src3)", [],
+  "$src1 = $dst, $src2 = $_dst_"> {
+    bits<5> dst;
+    bits<5> src2;
+    bits<1> src3;
+
+    let accessSize = AccessSz;
+    let IClass = 0b1001;
+
+    let Inst{27-25} = 0b110;
+    let Inst{24-21} = MajOp;
+    let Inst{20-16} = src2;
+    let Inst{13}    = src3;
+    let Inst{12}    = 0b0;
+    let Inst{7}     = 0b0;
+    let Inst{4-0}   = dst;
+  }
+
+def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
+def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
+
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated post increment .new stores
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+    isNewValue = 1, opNewValue = 3 in
+class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
+  : NVInstPI_V4 <(outs IntRegs:$_dst_),
+                 (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
+  mnemonic#"($src1++#$offset) = $src2.new",
+  [], "$src1 = $_dst_">,
+  AddrModeRel {
+    bits<5> src1;
+    bits<3> src2;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0}));
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = src1;
+    let Inst{13} = 0b0;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = src2;
+    let Inst{7} = 0b0;
+    let Inst{6-3} = offsetBits;
+    let Inst{1} = 0b0;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated post increment .new stores
+// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
+    isNewValue = 1, opNewValue = 4 in
+class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
+                         bits<2> MajOp, bit isPredNot, bit isPredNew >
+  : NVInstPI_V4 <(outs IntRegs:$_dst_),
+                 (ins PredRegs:$src1, IntRegs:$src2,
+                      ImmOp:$offset, IntRegs:$src3),
+  !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+  [], "$src2 = $_dst_">,
+  AddrModeRel {
+    bits<2> src1;
+    bits<5> src2;
+    bits<3> src3;
+    bits<7> offset;
+    bits<4> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
+                     !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
+                                      /* s4_0Imm */ offset{3-0}));
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isPredNot;
+
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = src2;
+    let Inst{13} = 0b1;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8} = src3;
+    let Inst{7} = isPredNew;
+    let Inst{6-3} = offsetBits;
+    let Inst{2} = isPredNot;
+    let Inst{1-0} = src1;
+  }
+
+multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp,
+                              bits<2> MajOp, bit PredNot> {
+  def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>;
+
+  // Predicate new
+  def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>;
+}
+
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
+                         bits<2> MajOp> {
+  let BaseOpcode = "POST_"#BaseOp in {
+    def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>;
+
+    // Predicated
+    defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>;
+    defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>;
+  }
+}
+
+let accessSize = ByteAccess in
+defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
+
+let accessSize = HalfWordAccess in
+defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
+
+let accessSize = WordAccess in
+defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
+
+//===----------------------------------------------------------------------===//
+// Template class for post increment .new stores with register offset
+//===----------------------------------------------------------------------===//
+let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
+class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
+  : NVInstPI_V4 <(outs IntRegs:$_dst_),
+                 (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3),
+  #mnemonic#"($src1++$src2) = $src3.new",
+  [], "$src1 = $_dst_"> {
+    bits<5> src1;
+    bits<1> src2;
+    bits<3> src3;
+    let accessSize = AccessSz;
+
+    let IClass = 0b1010;
+
+    let Inst{27-21} = 0b1101101;
+    let Inst{20-16} = src1;
+    let Inst{13}    = src2;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src3;
+    let Inst{7}     = 0b0;
+  }
+
+def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
+def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
+def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NV/J +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps with the register
+// operands.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+    opExtentAlign = 2 in
+class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
+                      bit isNegCond, bit isTak>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#
+    "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
+    "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
+    #!if(isTak, "t","nt")#" $offset", []> {
+
+      bits<5> src1;
+      bits<5> src2;
+      bits<3> Ns;    // New-Value Operand
+      bits<5> RegOp; // Non-New-Value Operand
+      bits<11> offset;
+
+      let isTaken = isTak;
+      let isPredicatedFalse = isNegCond;
+      let opNewValue{0} = NvOpNum;
+
+      let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
+      let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
+
+      let IClass = 0b0010;
+      let Inst{27-26} = 0b00;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = Ns;
+      let Inst{13} = isTak;
+      let Inst{12-8} = RegOp;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
+}
+
+
+multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
+                       bit isNegCond> {
+  // Branch not taken:
+  def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+  // Branch taken:
+  def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
+}
+
+// NvOpNum = 0 -> First Operand is a new-value Register
+// NvOpNum = 1 -> Second Operand is a new-value Register
+
+multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
+                       bit NvOpNum> {
+  let BaseOpcode = BaseOp#_NVJ in {
+    defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+    defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+  }
+}
+
+// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_cmpeq  : NVJrr_base<"cmp.eq",  "CMPEQ",  0b000, 0>, PredRel;
+  defm J4_cmpgt  : NVJrr_base<"cmp.gt",  "CMPGT",  0b001, 0>, PredRel;
+  defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+  defm J4_cmplt  : NVJrr_base<"cmp.gt",  "CMPLT",  0b011, 1>, PredRel;
+  defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an unsigned immediate (U5) operand.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
+    opExtentAlign = 2 in
+class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
+                         bit isTak>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
+    #!if(isTak, "t","nt")#" $offset", []> {
+
+      let isTaken = isTak;
+      let isPredicatedFalse = isNegCond;
+      let isTaken = isTak;
+
+      bits<3> src1;
+      bits<5> src2;
+      bits<11> offset;
+
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTak;
+      let Inst{12-8} = src2;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
+}
+
+multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
+  // Branch not taken:
+  def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+  // Branch taken:
+  def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
+}
+
+multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
+  let BaseOpcode = BaseOp#_NVJri in {
+    defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+    defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+  }
+}
+
+// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_cmpeqi  : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+  defm J4_cmpgti  : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+  defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an hardcoded 0/-1 immediate value.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
+    opExtentAlign = 2 in
+class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
+                            bit isNegCond, bit isTak>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic
+    #"($src1.new, #"#ImmVal#")) jump:"
+    #!if(isTak, "t","nt")#" $offset", []> {
+
+      let isTaken = isTak;
+      let isPredicatedFalse = isNegCond;
+      let isTaken = isTak;
+
+      bits<3> src1;
+      bits<11> offset;
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTak;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
+}
+
+multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
+                             bit isNegCond> {
+  // Branch not taken:
+  def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+  // Branch taken:
+  def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
+}
+
+multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
+                             string ImmVal> {
+  let BaseOpcode = BaseOp#_NVJ_ConstImm in {
+    defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
+    defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
+  }
+}
+
+// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
+// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
+    Defs = [PC], hasSideEffects = 0 in {
+  defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+  defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
+  defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
+}
+
+// J4_hintjumpr: Hint indirect conditional jump.
+let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def J4_hintjumpr: JRInst <
+  (outs),
+  (ins IntRegs:$Rs),
+  "hintjr($Rs)"> {
+    bits<5> Rs;
+    let IClass = 0b0101;
+    let Inst{27-21} = 0b0010101;
+    let Inst{20-16} = Rs;
+  }
+
+//===----------------------------------------------------------------------===//
+// NV/J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+
+// PC-relative add
+let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
+    isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
+def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
+  "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
+    bits<5> Rd;
+    bits<6> u6;
+
+    let IClass = 0b0110;
+    let Inst{27-16} = 0b101001001001;
+    let Inst{12-7} = u6;
+    let Inst{4-0} = Rd;
+  }
+
+
+
+let hasSideEffects = 0 in
+class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
+    : CRInst<(outs PredRegs:$Pd),
+             (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu),
+             "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " #
+                   !if (IsNeg,"!","") # "$Pu))",
+             [], "", CR_tc_2early_SLOT23> {
+  bits<2> Pd;
+  bits<2> Ps;
+  bits<2> Pt;
+  bits<2> Pu;
+
+  let IClass = 0b0110;
+  let Inst{27-24} = 0b1011;
+  let Inst{23} = IsNeg;
+  let Inst{22-21} = OpBits;
+  let Inst{20} = 0b1;
+  let Inst{17-16} = Ps;
+  let Inst{13} = 0b0;
+  let Inst{9-8} = Pt;
+  let Inst{7-6} = Pu;
+  let Inst{1-0} = Pd;
+}
+
+def C4_and_and  : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
+def C4_and_or   : T_LOGICAL_3OP<"and", "or",  0b01, 0>;
+def C4_or_and   : T_LOGICAL_3OP<"or",  "and", 0b10, 0>;
+def C4_or_or    : T_LOGICAL_3OP<"or",  "or",  0b11, 0>;
+def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
+def C4_and_orn  : T_LOGICAL_3OP<"and", "or",  0b01, 1>;
+def C4_or_andn  : T_LOGICAL_3OP<"or",  "and", 0b10, 1>;
+def C4_or_orn   : T_LOGICAL_3OP<"or",  "or",  0b11, 1>;
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or,  C4_and_or>;
+def: LogLog_pat<or,  and, C4_or_and>;
+def: LogLog_pat<or,  or,  C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or,  C4_and_orn>;
+def: LogLogNot_pat<or,  and, C4_or_andn>;
+def: LogLogNot_pat<or,  or,  C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonAtGot
+  : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
+  : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel     : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
+
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+         (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+         (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+         (C4_addipc imm:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Logical with-not instructions.
+def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
+def A4_ornp  : T_ALU64_logical<"or",  0b011, 1, 0, 1>;
+
+def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+         (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or  (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+         (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0101111;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4-0} = Rd;
+}
+
+//  Add and accumulate.
+//  Rd=add(Rs,add(Ru,#s6))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
+    opExtendable = 3 in
+def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
+                            (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
+  "$Rd = add($Rs, add($Ru, #$s6))" ,
+  [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
+                           (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
+  "", ALU64_tc_2_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<5> Ru;
+    bits<6> s6;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b10110;
+    let Inst{22-21} = s6{5-4};
+    let Inst{20-16} = Rs;
+    let Inst{13}    = s6{3};
+    let Inst{12-8}  = Rd;
+    let Inst{7-5}   = s6{2-0};
+    let Inst{4-0}   = Ru;
+  }
+
+let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
+    opExtentBits = 6, opExtendable = 2 in
+def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
+                           (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
+  "$Rd = add($Rs, sub(#$s6, $Ru))",
+  [], "", ALU64_tc_2_SLOT23> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<6> s6;
+    bits<5> Ru;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b10111;
+    let Inst{22-21} = s6{5-4};
+    let Inst{20-16} = Rs;
+    let Inst{13}    = s6{3};
+    let Inst{12-8}  = Rd;
+    let Inst{7-5}   = s6{2-0};
+    let Inst{4-0}   = Ru;
+  }
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
+                                        (i32 IntRegs:$src3))),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
+                   (i32 IntRegs:$src3)),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
+                   (s32ImmPred:$src2)),
+         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
+
+
+//  Add or subtract doublewords with carry.
+//TODO:
+//  Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+//  Rdd=sub(Rss,Rtt,Px):carry
+
+// Extract bitfield
+// Rdd=extract(Rss,#u6,#U6)
+// Rdd=extract(Rss,Rtt)
+// Rd=extract(Rs,Rtt)
+// Rd=extract(Rs,#u5,#U5)
+
+def S4_extractp_rp : T_S3op_64 < "extract",  0b11, 0b100, 0>;
+def S4_extractp    : T_S2op_extract <"extract",  0b1010, DoubleRegs, u6Imm>;
+
+let hasNewValue = 1 in {
+  def S4_extract_rp : T_S3op_extract<"extract",  0b01>;
+  def S4_extract    : T_S2op_extract <"extract",  0b1101, IntRegs, u5Imm>;
+}
+
+// Complex add/sub halfwords/words
+let Defs = [USR_OVF] in {
+  def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
+  def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
+  def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
+  def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
+}
+
+let Defs = [USR_OVF] in {
+  def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
+  def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
+}
+
+let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
+  def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
+  def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
+}
+
+// Logical xor with xor accumulation.
+// Rxx^=xor(Rss,Rtt)
+let hasSideEffects = 0 in
+def M4_xor_xacc
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Rxx ^= xor($Rss, $Rtt)",
+  [(set (i64 DoubleRegs:$Rxx),
+   (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss),
+                                     (i64 DoubleRegs:$Rtt))))],
+  "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-22} = 0b101010;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{7-5}   = 0b000;
+    let Inst{4-0}   = Rxx;
+  }
+
+// Rotate and reduce bytes
+// Rdd=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate
+  : SInst <(outs DoubleRegs:$Rdd),
+           (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+  "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
+  [], "", S_3op_tc_3x_SLOT23> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rt;
+    bits<2> u2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-22} = 0b001111;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = u2{1};
+    let Inst{12-8}  = Rt;
+    let Inst{7-6}   = 0b11;
+    let Inst{5}     = u2{0};
+    let Inst{4-0}   = Rdd;
+  }
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+let hasSideEffects = 0 in
+def S4_vrcrotate_acc
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+  "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
+  "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rt;
+    bits<2> u2;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = u2{1};
+    let Inst{12-8}  = Rt;
+    let Inst{5}     = u2{0};
+    let Inst{4-0}   = Rxx;
+  }
+
+// Vector reduce conditional negate halfwords
+let hasSideEffects = 0 in
+def S2_vrcnegh
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
+  "$Rxx += vrcnegh($Rss, $Rt)", [],
+  "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011001;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = 0b1;
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = 0b111;
+    let Inst{4-0}   = Rxx;
+  }
+
+// Split bitfield
+def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
+
+// Arithmetic/Convergent round
+def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
+
+def A4_round_ri  : T_S2op_2_ii <"round", 0b111, 0b100>;
+
+let Defs = [USR_OVF] in
+def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
+
+// Logical-logical words.
+// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
+let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
+    opExtendable = 3 in
+def S4_or_andix:
+  ALU64Inst<(outs IntRegs:$Rx),
+            (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
+  "$Rx = or($Ru, and($_src_, #$s10))" ,
+  [(set (i32 IntRegs:$Rx),
+        (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
+  "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
+    bits<5> Rx;
+    bits<5> Ru;
+    bits<10> s10;
+
+    let IClass = 0b1101;
+
+    let Inst{27-22} = 0b101001;
+    let Inst{20-16} = Rx;
+    let Inst{21}    = s10{9};
+    let Inst{13-5}  = s10{8-0};
+    let Inst{4-0}   = Ru;
+  }
+
+// Miscellaneous ALU64 instructions.
+//
+let hasNewValue = 1, hasSideEffects = 0 in
+def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0011111;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7-5} = 0b111;
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0100;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0 in
+def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b0100;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0101100;
+  let Inst{20-16} = Rs;
+  let Inst{12-8} = Rt;
+  let Inst{7} = 0b0;
+  let Inst{4-0} = Rd;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, IntRegs:$Rt),
+      "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
+  bits<5> Rd;
+  bits<5> Rs;
+  bits<5> Rt;
+
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0101100;
+  let Inst{20-16} = Rt;
+  let Inst{12-8} = Rs;
+  let Inst{7} = 0b1;
+  let Inst{4-0} = Rd;
+}
+
+// Rx[&|]=xor(Rs,Rt)
+def M4_or_xor   : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
+def M4_and_xor  : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
+
+// Rx[&|^]=or(Rs,Rt)
+def M4_xor_or   : T_MType_acc_rr < "^= or",  0b110, 0b011, 0>;
+
+let CextOpcode = "ORr_ORr" in
+def M4_or_or    : T_MType_acc_rr < "|= or",  0b110, 0b000, 0>;
+def M4_and_or   : T_MType_acc_rr < "&= or",  0b010, 0b001, 0>;
+
+// Rx[&|^]=and(Rs,Rt)
+def M4_xor_and  : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>;
+
+let CextOpcode = "ORr_ANDr" in
+def M4_or_and   : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>;
+def M4_and_and  : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
+
+// Rx[&|^]=and(Rs,~Rt)
+def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
+def M4_or_andn  : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
+def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
+                                              (not IntRegs:$src3)))),
+         (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
+
+// Compound or-or and or-and
+let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
+    opExtentBits = 10, opExtendable = 3 in
+class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
+  : MInst_acc <(outs IntRegs:$Rx),
+               (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
+  "$Rx |= "#mnemonic#"($Rs, #$s10)",
+  [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
+                           (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
+  "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<10> s10;
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b1010;
+    let Inst{23-22} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{21}    = s10{9};
+    let Inst{13-5}  = s10{8-0};
+    let Inst{4-0}   = Rx;
+  }
+
+let CextOpcode = "ORr_ANDr" in
+def S4_or_andi : T_CompOR <"and", 0b00, and>;
+
+let CextOpcode = "ORr_ORr" in
+def S4_or_ori : T_CompOR <"or", 0b10, or>;
+
+//    Modulo wrap
+//        Rd=modwrap(Rs,Rt)
+//    Round
+//        Rd=cround(Rs,#u5)
+//        Rd=cround(Rs,Rt)
+//        Rd=round(Rs,#u5)[:sat]
+//        Rd=round(Rs,Rt)[:sat]
+//    Vector reduce add unsigned halfwords
+//        Rd=vraddh(Rss,Rtt)
+//    Vector add bytes
+//        Rdd=vaddb(Rss,Rtt)
+//    Vector conditional negate
+//        Rdd=vcnegh(Rss,Rt)
+//        Rxx+=vrcnegh(Rss,Rt)
+//    Vector maximum bytes
+//        Rdd=vmaxb(Rtt,Rss)
+//    Vector reduce maximum halfwords
+//        Rxx=vrmaxh(Rss,Ru)
+//        Rxx=vrmaxuh(Rss,Ru)
+//    Vector reduce maximum words
+//        Rxx=vrmaxuw(Rss,Ru)
+//        Rxx=vrmaxw(Rss,Ru)
+//    Vector minimum bytes
+//        Rdd=vminb(Rtt,Rss)
+//    Vector reduce minimum halfwords
+//        Rxx=vrminh(Rss,Ru)
+//        Rxx=vrminuh(Rss,Ru)
+//    Vector reduce minimum words
+//        Rxx=vrminuw(Rss,Ru)
+//        Rxx=vrminw(Rss,Ru)
+//    Vector subtract bytes
+//        Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT +
+//===----------------------------------------------------------------------===//
+
+// Bit reverse
+def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
+
+// Bit count
+def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
+def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
+def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
+
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
+    "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+  bits<5> Rs;
+  bits<5> Rd;
+  bits<6> s6;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b001;
+  let Inst{20-16} = Rs;
+  let Inst{13-8} = s6;
+  let Inst{7-5} = 0b000;
+  let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
+    "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
+  bits<5> Rs;
+  bits<5> Rd;
+  bits<6> s6;
+  let IClass = 0b1000;
+  let Inst{27-24} = 0b1000;
+  let Inst{23-21} = 0b011;
+  let Inst{20-16} = Rs;
+  let Inst{13-8} = s6;
+  let Inst{7-5} = 0b010;
+  let Inst{4-0} = Rd;
+}
+
+
+// Bit test/set/clear
+def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
+def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
+
+let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
+  def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+           (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>;
+  def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+           (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
+}
+
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+//   if ([!]tstbit(...)) jump ...
+let AddedComplexity = 100 in
+def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+         (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+         (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+def C4_nbitsset  : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
+def C4_nbitsclr  : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
+def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
+
+// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
+// represented as a compare against "value & 0xFF", which is an exact match
+// for cmpb (same for cmph). The patterns below do not contain any additional
+// complexity that would make them preferable, and if they were actually used
+// instead of cmpb/cmph, they would result in a compare against register that
+// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
+def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)),
+         (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
+         (C4_nbitsclr I32:$Rs, I32:$Rt)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
+         (C4_nbitsset I32:$Rs, I32:$Rt)>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
+
+let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
+def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
+  (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
+  "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
+  [(set (i32 IntRegs:$Rd),
+        (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
+             u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+    bits<5> Rd;
+    bits<6> u6;
+    bits<5> Rs;
+    bits<6> U6;
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b1000;
+    let Inst{23}    = U6{5};
+    let Inst{22-21} = u6{5-4};
+    let Inst{20-16} = Rs;
+    let Inst{13}    = u6{3};
+    let Inst{12-8}  = Rd;
+    let Inst{7-5}   = u6{2-0};
+    let Inst{4-0}   = U6{4-0};
+  }
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
+    isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
+def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
+  (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
+  [(set (i32 IntRegs:$Rd),
+        (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
+  "", ALU64_tc_3x_SLOT23>, ImmRegRel {
+    bits<5> Rd;
+    bits<6> u6;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b01110;
+    let Inst{22-21} = u6{5-4};
+    let Inst{20-16} = Rs;
+    let Inst{13}    = u6{3};
+    let Inst{12-8}  = Rt;
+    let Inst{7-5}   = u6{2-0};
+    let Inst{4-0}   = Rd;
+  }
+
+let hasNewValue = 1 in
+class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
+  : ALU64Inst <(outs IntRegs:$dst), ins,
+  "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
+                                      "#$src2, $src3))"),
+  [(set (i32 IntRegs:$dst),
+        (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))],
+  "", ALU64_tc_3x_SLOT23> {
+    bits<5> dst;
+    bits<5> src1;
+    bits<8> src2;
+    bits<5> src3;
+
+    let IClass = 0b1101;
+
+    bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2});
+
+    let Inst{27-24} = 0b1111;
+    let Inst{23}    = MajOp;
+    let Inst{22-21} = ImmValue{5-4};
+    let Inst{20-16} = src3;
+    let Inst{13}    = ImmValue{3};
+    let Inst{12-8}  = dst;
+    let Inst{7-5}   = ImmValue{2-0};
+    let Inst{4-0}   = src1;
+  }
+
+def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
+                       (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
+
+let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
+    CextOpcode = "ADD_MPY", InputType = "imm" in
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
+                    (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
+def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
+                              (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
+  "$Rx = add($Ru, mpyi($_src_, $Rs))",
+  [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru),
+                           (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))],
+  "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
+    bits<5> Rx;
+    bits<5> Ru;
+    bits<5> Rs;
+
+    let IClass = 0b1110;
+
+    let Inst{27-21} = 0b0011000;
+    let Inst{12-8} = Rx;
+    let Inst{4-0} = Ru;
+    let Inst{20-16} = Rs;
+  }
+
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
+def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
+def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
+def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
+def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
+def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
+def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
+def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
+
+// Rxx^=vpmpyh(Rs,Rt)
+def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
+
+// Rxx^=pmpyw(Rs,Rt)
+def M4_pmpyw_acc  : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/Vector compare
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Template class for vector compare
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in
+class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
+  : ALU64_rr <(outs PredRegs:$Pd),
+              (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
+  "$Pd = "#Str#"($Rss, #$Imm)",
+  [], "", ALU64_tc_2early_SLOT23> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<32> Imm;
+    bits<8> ImmBits;
+    let ImmBits{6-0} = Imm{6-0};
+    let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
+
+    let IClass = 0b1101;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{22-21} = cmpOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-5} = ImmBits;
+    let Inst{4-3} = minOp;
+    let Inst{1-0} = Pd;
+  }
+
+// Vector compare bytes
+def A4_vcmpbgt   : T_vcmp <"vcmpb.gt", 0b1010>;
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
+def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
+
+def A4_vcmpbeqi  : T_vcmpImm <"vcmpb.eq",  0b00, 0b00, u8Imm>;
+def A4_vcmpbgti  : T_vcmpImm <"vcmpb.gt",  0b01, 0b00, s8Imm>;
+def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>;
+
+// Vector compare halfwords
+def A4_vcmpheqi  : T_vcmpImm <"vcmph.eq",  0b00, 0b01, s8Imm>;
+def A4_vcmphgti  : T_vcmpImm <"vcmph.gt",  0b01, 0b01, s8Imm>;
+def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>;
+
+// Vector compare words
+def A4_vcmpweqi  : T_vcmpImm <"vcmpw.eq",  0b00, 0b10, s8Imm>;
+def A4_vcmpwgti  : T_vcmpImm <"vcmpw.gt",  0b01, 0b10, s8Imm>;
+def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate and accumulate/logical.
+// Rx=add(#u8,asl(Rx,#U5))  Rx=add(#u8,lsr(Rx,#U5))
+// Rx=sub(#u8,asl(Rx,#U5))  Rx=sub(#u8,lsr(Rx,#U5))
+// Rx=and(#u8,asl(Rx,#U5))  Rx=and(#u8,lsr(Rx,#U5))
+// Rx=or(#u8,asl(Rx,#U5))   Rx=or(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+    hasNewValue = 1, opNewValue = 0 in
+class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
+                        bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
+  : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
+      "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
+      [(set (i32 IntRegs:$Rd),
+            (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
+      "$Rd = $Rx", Itin> {
+
+  bits<5> Rd;
+  bits<8> u8;
+  bits<5> Rx;
+  bits<5> U5;
+
+  let IClass = 0b1101;
+  let Inst{27-24} = 0b1110;
+  let Inst{23-21} = u8{7-5};
+  let Inst{20-16} = Rd;
+  let Inst{13} = u8{4};
+  let Inst{12-8} = U5;
+  let Inst{7-5} = u8{3-1};
+  let Inst{4} = asl_lsr;
+  let Inst{3} = u8{0};
+  let Inst{2-1} = MajOp;
+}
+
+multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
+                          InstrItinClass Itin> {
+  def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>;
+  def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
+}
+
+let AddedComplexity = 200 in {
+  defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
+  defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
+}
+
+let AddedComplexity = 30 in
+defm S4_ori  : T_ShiftOperate<"or",  or,  0b01, ALU64_tc_1_SLOT23>;
+
+defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
+
+let AddedComplexity = 200 in {
+  def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+           (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+           (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
+           (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
+           (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
+}
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def S2_vcnegh   : T_S3op_shiftVect < "vcnegh",   0b11, 0b01>;
+
+// Rd=[cround|round](Rs,Rt)
+let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
+  def A4_cround_rr    : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
+  def A4_round_rr     : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
+}
+
+// Rd=round(Rs,Rt):sat
+let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
+def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
+
+// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+  def M4_cmpyi_wh     : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
+  def M4_cmpyr_wh     : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
+}
+
+// Rdd=[add|sub](Rss,Rtt,Px):carry
+let isPredicateLate = 1, hasSideEffects = 0 in
+class T_S3op_carry <string mnemonic, bits<3> MajOp>
+  : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px),
+            (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
+  "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry",
+  [], "$Px = $Pu", S_3op_tc_1_SLOT23 > {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<5> Rtt;
+    bits<2> Pu;
+
+    let IClass = 0b1100;
+
+    let Inst{27-24} = 0b0010;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rss;
+    let Inst{12-8}  = Rtt;
+    let Inst{6-5}   = Pu;
+    let Inst{4-0}   = Rdd;
+  }
+
+def A4_addp_c : T_S3op_carry < "add", 0b110 >;
+def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
+
+let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
+class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
+  : SInst <(outs DoubleRegs:$Rxx),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
+  "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
+  [] , "$dst2 = $Rxx"> {
+    bits<5> Rxx;
+    bits<5> Rss;
+    bits<5> Ru;
+
+    let IClass = 0b1100;
+
+    let Inst{27-21} = 0b1011001;
+    let Inst{20-16} = Rss;
+    let Inst{13}    = isUnsigned;
+    let Inst{12-8}  = Rxx;
+    let Inst{7-5}   = MinOp;
+    let Inst{4-0}   = Ru;
+  }
+
+// Vector reduce maximum halfwords
+// Rxx=vrmax[u]h(Rss,Ru)
+def A4_vrmaxh  : T_S3op_6 < "vrmaxh",  0b001, 0>;
+def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
+
+// Vector reduce maximum words
+// Rxx=vrmax[u]w(Rss,Ru)
+def A4_vrmaxw  : T_S3op_6 < "vrmaxw",  0b010, 0>;
+def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
+
+// Vector reduce minimum halfwords
+// Rxx=vrmin[u]h(Rss,Ru)
+def A4_vrminh  : T_S3op_6 < "vrminh",  0b101, 0>;
+def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
+
+// Vector reduce minimum words
+// Rxx=vrmin[u]w(Rss,Ru)
+def A4_vrminw  : T_S3op_6 < "vrminw",  0b110, 0>;
+def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
+
+// Shift an immediate left by register amount.
+let hasNewValue = 1, hasSideEffects = 0 in
+def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
+  "$Rd = lsl(#$s6, $Rt)" ,
+  [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
+                                 (i32 IntRegs:$Rt)))],
+  "", S_3op_tc_1_SLOT23> {
+    bits<5> Rd;
+    bits<6> s6;
+    bits<5> Rt;
+
+    let IClass = 0b1100;
+
+    let Inst{27-22} = 0b011010;
+    let Inst{20-16} = s6{5-1};
+    let Inst{12-8}  = Rt;
+    let Inst{7-6}   = 0b11;
+    let Inst{4-0}   = Rd;
+    let Inst{5}     = s6{0};
+  }
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+def MEMOPIMM : SDNodeXForm<imm, [{
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int32_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 65535..65515
+  // assigning to a short restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int16_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 255..235
+  // assigning to a char restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int8_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-31].
+   // As an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-31].
+   // As an SDNode.
+   // we bit negate the value first
+   int32_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-15].
+   // As an SDNode.
+   int16_t imm = N->getSExtValue();
+   return XformMskToBitPosU4Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-15].
+   // As an SDNode.
+   // we bit negate the value first
+   int16_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU4Imm(imm, SDLoc(N));
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-7].
+   // As an SDNode.
+   int8_t imm =  N->getSExtValue();
+   return XformMskToBitPosU3Imm(imm, SDLoc(N));
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-7].
+   // As an SDNode.
+   // we bit negate the value first
+   int8_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU3Imm(imm, SDLoc(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4<(outs),
+                 (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+                 opc#"($base+#$offset)"#memOp#"$delta",
+                 []>,
+                 Requires<[UseMEMOP]> {
+
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+    let opExtentAlign = opcBits;
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1110;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the immediate value.
+//===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4 <(outs),
+                  (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+                  opc#"($base+#$offset)"#memOp#"#$delta"
+                  #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+                  []>,
+                  Requires<[UseMEMOP]> {
+
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+    let opExtentAlign = opcBits;
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1111;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
+
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+  def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+  def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+  def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+  def L4_or#NAME  : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
+
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+  def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+  def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+  def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>;
+  def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>;
+}
+
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+  defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>;
+  defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>;
+}
+
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
+  let opExtentBits = 6, accessSize = ByteAccess in
+  defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+  let opExtentBits = 7, accessSize = HalfWordAccess in
+  defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+  let opExtentBits = 8, accessSize = WordAccess in
+  defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+                          InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 180 in
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+                  IntRegs:$addr),
+            (MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
+
+  let AddedComplexity = 190 in
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
+                  u5ImmPred:$addend),
+            (add IntRegs:$base, ImmPred:$offset)),
+            (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
+}
+
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+                          InstHexagon addMI, InstHexagon subMI> {
+  defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+  defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
+}
+
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+                        L4_iadd_memoph_io, L4_isub_memoph_io>;
+  // Byte
+  defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+                        L4_iadd_memopb_io, L4_isub_memopb_io>;
+}
+
+let Predicates = [UseMEMOP] in {
+  defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_u5ExtType<extloadi8,  extloadi16>;  // any extend
+
+  // Word
+  defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
+                        L4_isub_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
+                          PatLeaf immPred, SDNodeXForm xformFunc,
+                          InstHexagon MI> {
+  let AddedComplexity = 190 in
+  def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
+
+  let AddedComplexity = 195 in
+  def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
+                  immPred:$subend),
+           (add IntRegs:$base, ImmPred:$offset)),
+           (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
+}
+
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
+                       MEMOPIMM_HALF, L4_isub_memoph_io>;
+  // Byte
+  defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
+                       MEMOPIMM_BYTE, L4_isub_memopb_io>;
+}
+
+let Predicates = [UseMEMOP] in {
+  defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_m5ExtType<extloadi8,  extloadi16>;  // any extend
+
+  // Word
+  defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
+                       MEMOPIMM, L4_isub_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+                     PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI,
+                     SDNode OpNode> {
+
+  // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+  let AddedComplexity = 250 in
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                  immPred:$bitend),
+           (add IntRegs:$base, extPred:$offset)),
+           (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+  // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+  let AddedComplexity = 225 in
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
+  // Byte - clrbit
+  defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
+                       CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
+  // Byte - setbit
+  defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
+                       SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
+  // Half Word - clrbit
+  defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
+                       CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
+  // Half Word - setbit
+  defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
+                       SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
+}
+
+let Predicates = [UseMEMOP] in {
+  // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+  // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+  defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOpi_bitExtType<extloadi8,  extloadi16>;  // any extend
+
+  // memw(Rs+#0) = [clrbit|setbit](#U5)
+  // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+  defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
+                       L4_iand_memopw_io, and>;
+  defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
+                       L4_ior_memopw_io, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+                        InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 141 in
+  // mem[bhw](Rs+#0) [+-&|]= Rt
+  def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)),
+                 IntRegs:$addr),
+           (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>;
+
+  // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+  let AddedComplexity = 150 in
+  def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                  (i32 IntRegs:$orend)),
+           (add IntRegs:$base, extPred:$offset)),
+           (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+                        InstHexagon addMI, InstHexagon subMI,
+                        InstHexagon andMI, InstHexagon orMI> {
+  defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>;
+  defm: MemOpr_Pats <ldOp, stOp, extPred, orMI,  or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
+                      L4_add_memoph_io, L4_sub_memoph_io,
+                      L4_and_memoph_io, L4_or_memoph_io>;
+  // Byte
+  defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
+                      L4_add_memopb_io, L4_sub_memopb_io,
+                      L4_and_memopb_io, L4_or_memopb_io>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [UseMEMOP] in {
+  // Byte, Half Word
+  defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm: MemOPr_ExtType<extloadi8,  extloadi16>;  // any extend
+  // Word
+  defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
+                      L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
+}
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
+// Pd=cmpb.eq(Rs,#u8)
+
+// p=!cmp.eq(r1,#s10)
+def C4_cmpneqi  : T_CMP <"cmp.eq",  0b00, 1, s10Ext>;
+def C4_cmpltei  : T_CMP <"cmp.gt",  0b01, 1, s10Ext>;
+def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
+
+def : T_CMP_pat <C4_cmpneqi,  setne,  s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei,  setle,  s32ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
+
+// rs <= rt -> !(rs > rt).
+/*
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+//         (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
+*/
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+         (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformU7ToU7M1Imm(imm, SDLoc(N));
+}]>;
+
+// For the sequence
+//   zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+//    biasing the expression by 48. We are depending on the representation of
+//    the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+//   retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+//   retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+                         u7StrictPosImmPred:$src2)))),
+         (C2_muxii (A4_cmpbgtui IntRegs:$src1,
+                    (DEC_CONST_BYTE u7StrictPosImmPred:$src2)),
+          0, 1)>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Multiclass for DeallocReturn
+//===----------------------------------------------------------------------===//
+class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak>
+  : LD0Inst<(outs), (ins PredRegs:$src),
+  !if(isNot, "if (!$src", "if ($src")#
+  !if(isPredNew, ".new) ", ") ")#mnemonic#
+  !if(isPredNew, #!if(isTak,":t", ":nt"),""),
+  [], "", LD_tc_3or4stall_SLOT0> {
+
+    bits<2> src;
+    let BaseOpcode = "L4_RETURN";
+    let isPredicatedFalse = isNot;
+    let isPredicatedNew = isPredNew;
+    let isTaken = isTak;
+    let IClass = 0b1001;
+
+    let Inst{27-16} = 0b011000011110;
+
+    let Inst{13} = isNot;
+    let Inst{12} = isTak;
+    let Inst{11} = isPredNew;
+    let Inst{10} = 0b0;
+    let Inst{9-8} = src;
+    let Inst{4-0} = 0b11110;
+  }
+
+// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt
+multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> {
+  let isPredicated = 1 in {
+    def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>;
+    def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>;
+    def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>;
+  }
+}
+
+multiclass LD_MISC_L4_RETURN<string mnemonic> {
+  let isBarrier = 1, isPredicable = 1 in
+    def NAME : LD0Inst <(outs), (ins), mnemonic, [], "",
+                        LD_tc_3or4stall_SLOT0> {
+      let BaseOpcode = "L4_RETURN";
+      let IClass = 0b1001;
+      let Inst{27-16} = 0b011000011110;
+      let Inst{13-10} = 0b0000;
+      let Inst{4-0} = 0b11110;
+    }
+  defm t : L4_RETURN_PRED<mnemonic, 0 >;
+  defm f : L4_RETURN_PRED<mnemonic, 1 >;
+}
+
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
+defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+  def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+  let isExtended = 1, opExtendable = 0 in
+    def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+  def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+  let isExtended = 1, opExtendable = 0 in
+    def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
+}
+
+// Save registers function call.
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+  def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+  let isExtended = 1, opExtendable = 0 in
+    def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1 in
+class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+                    bits<2>MajOp, bit isAbs, bit isHalf>
+  : STInst<(outs), (ins ImmOp:$addr, RC:$src),
+  mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
+  [], "", V2LDST_tc_st_SLOT01> {
+    bits<19> addr;
+    bits<5> src;
+    bits<16> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+                     !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+                     !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+                                      /* u16_0Imm */ addr{15-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b0100;
+    let Inst{27} = 1;
+    let Inst{26-25} = offsetBits{15-14};
+    let Inst{24}    = 0b0;
+    let Inst{23-22} = MajOp;
+    let Inst{21}    = isHalf;
+    let Inst{20-16} = offsetBits{13-9};
+    let Inst{13}    = offsetBits{8};
+    let Inst{12-8}  = src;
+    let Inst{7-0}   = offsetBits{7-0};
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
+class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
+                       bit isHalf, bit isNot, bit isNew>
+  : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
+  !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+  ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
+  [], "", ST_tc_st_SLOT01>, AddrModeRel {
+    bits<2> src1;
+    bits<6> absaddr;
+    bits<5> src2;
+
+    let isPredicatedNew = isNew;
+    let isPredicatedFalse = isNot;
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
+    let IClass = 0b1010;
+
+    let Inst{27-24} = 0b1111;
+    let Inst{23-22} = MajOp;
+    let Inst{21}    = isHalf;
+    let Inst{17-16} = absaddr{5-4};
+    let Inst{13}    = isNew;
+    let Inst{12-8}  = src2;
+    let Inst{7}     = 0b1;
+    let Inst{6-3}   = absaddr{3-0};
+    let Inst{2}     = isNot;
+    let Inst{1-0}   = src1;
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+                 bits<2> MajOp, bit isHalf>
+  : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
+                  AddrModeRel {
+  string ImmOpStr = !cast<string>(ImmOp);
+  let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+                     !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+                     !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+                                      /* u16_0Imm */ 16)));
+
+  let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+                      !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+                      !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+                                       /* u16_0Imm */ 0)));
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass for store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let addrMode = Absolute, isExtended = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
+                  Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let opExtendable = 0, isPredicable = 1 in
+    def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
+
+    // Predicated
+    def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
+    def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>;
+
+    // .new Predicated
+    def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>;
+    def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated new-value store instructions with
+// GP-Relative or absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
+    isNewValue = 1, opNewValue = 1 in
+class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
+  : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
+  mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
+  [], "", V2LDST_tc_st_SLOT0> {
+    bits<19> addr;
+    bits<3> src;
+    bits<16> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+                     !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+                     !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+                                      /* u16_0Imm */ addr{15-0})));
+    let IClass = 0b0100;
+
+    let Inst{27} = 1;
+    let Inst{26-25} = offsetBits{15-14};
+    let Inst{24-21} = 0b0101;
+    let Inst{20-16} = offsetBits{13-9};
+    let Inst{13}    = offsetBits{8};
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src;
+    let Inst{7-0}   = offsetBits{7-0};
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
+    isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
+class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
+  : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2),
+  !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
+  ") ")#mnemonic#"(#$absaddr) = $src2.new",
+  [], "", ST_tc_st_SLOT0>, AddrModeRel {
+    bits<2> src1;
+    bits<6> absaddr;
+    bits<3> src2;
+
+    let isPredicatedNew = isNew;
+    let isPredicatedFalse = isNot;
+
+    let IClass = 0b1010;
+
+    let Inst{27-24} = 0b1111;
+    let Inst{23-21} = 0b101;
+    let Inst{17-16} = absaddr{5-4};
+    let Inst{13}    = isNew;
+    let Inst{12-11} = MajOp;
+    let Inst{10-8}  = src2;
+    let Inst{7}     = 0b1;
+    let Inst{6-3}   = absaddr{3-0};
+    let Inst{2}     = isNot;
+    let Inst{1-0}   = src1;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non-predicated new-value store instructions with
+// absolute addressing.
+//===----------------------------------------------------------------------===//
+class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
+  : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 1>, AddrModeRel {
+
+  string ImmOpStr = !cast<string>(ImmOp);
+  let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+                     !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+                     !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+                                      /* u16_0Imm */ 16)));
+
+  let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+                      !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+                      !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+                                       /* u16_0Imm */ 0)));
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass for new-value store instructions with absolute addressing.
+//===----------------------------------------------------------------------===//
+let addrMode = Absolute, isExtended = 1  in
+multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
+                   bits<2> MajOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let opExtendable = 0, isPredicable = 1 in
+    def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
+
+    // Predicated
+    def S4_p#NAME#newt_abs  : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
+    def S4_p#NAME#newf_abs  : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>;
+
+    // .new Predicated
+    def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>;
+    def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Stores with absolute addressing
+//===----------------------------------------------------------------------===//
+let accessSize = ByteAccess in
+defm storerb : ST_Abs    <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
+               ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
+
+let accessSize = HalfWordAccess in
+defm storerh : ST_Abs    <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
+               ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
+
+let accessSize = WordAccess in
+defm storeri : ST_Abs    <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
+               ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
+
+let isNVStorable = 0, accessSize = DoubleWordAccess in
+defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
+
+let isNVStorable = 0, accessSize = HalfWordAccess in
+defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
+
+//===----------------------------------------------------------------------===//
+// GP-relative stores.
+// mem[bhwd](#global)=Rt
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in
+class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
+                 Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
+  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
+    // Set BaseOpcode same as absolute addressing instructions so that
+    // non-predicated GP-Rel instructions can have relate with predicated
+    // Absolute instruction.
+    let BaseOpcode = BaseOp#_abs;
+  }
+
+let isAsmParserOnly = 1 in
+multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
+                  bits<2> MajOp, bit isHalf = 0> {
+  // Set BaseOpcode same as absolute addressing instructions so that
+  // non-predicated GP-Rel instructions can have relate with predicated
+  // Absolute instruction.
+  let BaseOpcode = BaseOp#_abs in {
+    def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
+                                0, isHalf>;
+    // New-value store
+    def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
+  }
+}
+
+let accessSize = ByteAccess in
+defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel;
+
+let accessSize = HalfWordAccess in
+defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel;
+
+let accessSize = WordAccess in
+defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel;
+
+let isNVStorable = 0, accessSize = DoubleWordAccess in
+def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs,
+                              u16_3Imm, 0b11>, PredNewRel;
+
+let isNVStorable = 0, accessSize = HalfWordAccess in
+def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
+                              u16_1Imm, 0b01, 1>, PredNewRel;
+
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+                 InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+                  InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr),
+        (MI Addr:$addr, (ValueMod Value:$val))>;
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbgp>;
+  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+  def: Storea_pat<store,         I32, addrgp, S2_storerigp>;
+  def: Storea_pat<store,         I64, addrgp, S2_storerdgp>;
+
+  // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+  //       to "r0 = 1; memw(#foo) = r0"
+  let AddedComplexity = 100 in
+  def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+           (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for non predicated load instructions with
+// absolute addressing mode.
+//===----------------------------------------------------------------------===//
+let isPredicable = 1, hasSideEffects = 0 in
+class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
+                   bits<3> MajOp>
+  : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
+  "$dst = "#mnemonic# "(#$addr)",
+  [], "", V2LDST_tc_ld_SLOT01> {
+    bits<5> dst;
+    bits<19> addr;
+    bits<16> offsetBits;
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
+                     !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
+                     !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
+                                      /* u16_0Imm */ addr{15-0})));
+
+    let IClass = 0b0100;
+
+    let Inst{27}    = 0b1;
+    let Inst{26-25} = offsetBits{15-14};
+    let Inst{24}    = 0b1;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = offsetBits{13-9};
+    let Inst{13-5}  = offsetBits{8-0};
+    let Inst{4-0}   = dst;
+  }
+
+class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
+                 bits<3> MajOp>
+  : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
+
+    string ImmOpStr = !cast<string>(ImmOp);
+    let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
+                       !if (!eq(ImmOpStr, "u16_2Imm"), 18,
+                       !if (!eq(ImmOpStr, "u16_1Imm"), 17,
+                                        /* u16_0Imm */ 16)));
+
+    let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
+                        !if (!eq(ImmOpStr, "u16_2Imm"), 2,
+                        !if (!eq(ImmOpStr, "u16_1Imm"), 1,
+                                        /* u16_0Imm */ 0)));
+  }
+
+//===----------------------------------------------------------------------===//
+// Template class for predicated load instructions with
+// absolute addressing mode.
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
+    opExtendable = 2 in
+class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
+                      bit isPredNot, bit isPredNew>
+  : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
+  !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+  ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
+    bits<5> dst;
+    bits<2> src1;
+    bits<6> absaddr;
+
+    let isPredicatedNew = isPredNew;
+    let isPredicatedFalse = isPredNot;
+    let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
+
+    let IClass = 0b1001;
+
+    let Inst{27-24} = 0b1111;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = absaddr{5-1};
+    let Inst{13} = 0b1;
+    let Inst{12} = isPredNew;
+    let Inst{11} = isPredNot;
+    let Inst{10-9} = src1;
+    let Inst{8} = absaddr{0};
+    let Inst{7} = 0b1;
+    let Inst{4-0} = dst;
+  }
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp,
+                       bit PredNot> {
+  def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>;
+  // Predicate new
+  def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>;
+}
+
+let addrMode = Absolute, isExtended = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
+                  Operand ImmOp, bits<3> MajOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let opExtendable = 1, isPredicable = 1 in
+    def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
+
+    // Predicated
+    defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
+    defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>;
+  }
+}
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+  defm loadrb  : LD_Abs<"memb",  "LDrib",  IntRegs, u16_0Imm, 0b000>;
+  defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+  defm loadrh  : LD_Abs<"memh",  "LDrih",  IntRegs, u16_1Imm, 0b010>;
+  defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+defm loadri  : LD_Abs<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess in
+defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// Once predicated, these instructions map to absolute addressing mode.
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in
+class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
+                bits<3> MajOp>
+  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
+    let BaseOpcode = BaseOp#_abs;
+  }
+
+let accessSize = ByteAccess, hasNewValue = 1 in {
+  def L2_loadrbgp  : T_LoadGP<"memb",  "LDrib",  IntRegs, u16_0Imm, 0b000>;
+  def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
+}
+
+let accessSize = HalfWordAccess, hasNewValue = 1 in {
+  def L2_loadrhgp  : T_LoadGP<"memh",  "LDrih",  IntRegs, u16_1Imm, 0b010>;
+  def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
+}
+
+let accessSize = WordAccess, hasNewValue = 1 in
+def L2_loadrigp  : T_LoadGP<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
+
+let accessSize = DoubleWordAccess in
+def L2_loadrdgp  : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
+
+def: Loada_pat<atomic_load_8,  i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+  : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+         (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi8, L2_loadrbgp>;
+  def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
+  def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
+  def: LoadGP_pats <extloadi16, L2_loadrhgp>;
+  def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+  def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+  def: LoadGP_pats <load, L2_loadrigp>;
+  def: LoadGP_pats <load, L2_loadrdgp, i64>;
+}
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+  def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
+
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs),      (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs),   (A2_tfrsi s16Ext:$Rs)>;
+
+let AddedComplexity  = 30 in {
+  def: Storea_pat<truncstorei8,  I32, u32ImmPred, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+  def: Storea_pat<store,         I32, u32ImmPred, S2_storeriabs>;
+}
+
+let AddedComplexity  = 30 in {
+  def: Loada_pat<load,        i32, u32ImmPred, L4_loadri_abs>;
+  def: Loada_pat<sextloadi8,  i32, u32ImmPred, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, u32ImmPred, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
+}
+
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 100 in
+def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
+
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrga, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi8,  i32, addrga, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, addrga, L4_loadrub_abs>;
+
+  def: Loada_pat<extloadi16,  i32, addrga, L4_loadruh_abs>;
+  def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>;
+
+  def: Loada_pat<load,        i32, addrga, L4_loadri_abs>;
+  def: Loada_pat<load,        i64, addrga, L4_loadrd_abs>;
+}
+
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrga, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
+  def: Storea_pat<store,         I32, addrga, S2_storeriabs>;
+  def: Storea_pat<store,         I64, addrga, S2_storerdabs>;
+
+  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
+}
+
+// i8/i16/i32 -> i64 loads
+// We need a complexity of 120 here to override preceding handling of
+// zextload.
+let AddedComplexity = 120 in {
+  def: Loadam_pat<extloadi8,   i64, addrga, Zext64, L4_loadrub_abs>;
+  def: Loadam_pat<sextloadi8,  i64, addrga, Sext64, L4_loadrb_abs>;
+  def: Loadam_pat<zextloadi8,  i64, addrga, Zext64, L4_loadrub_abs>;
+
+  def: Loadam_pat<extloadi16,  i64, addrga, Zext64, L4_loadruh_abs>;
+  def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>;
+  def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
+
+  def: Loadam_pat<extloadi32,  i64, addrga, Zext64, L4_loadri_abs>;
+  def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>;
+  def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>;
+}
+
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrgp, L4_loadrub_abs>;
+  def: Loada_pat<sextloadi8,  i32, addrgp, L4_loadrb_abs>;
+  def: Loada_pat<zextloadi8,  i32, addrgp, L4_loadrub_abs>;
+
+  def: Loada_pat<extloadi16,  i32, addrgp, L4_loadruh_abs>;
+  def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>;
+  def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>;
+
+  def: Loada_pat<load,        i32, addrgp, L4_loadri_abs>;
+  def: Loada_pat<load,        i64, addrgp, L4_loadrd_abs>;
+}
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>;
+  def: Storea_pat<store,         I32, addrgp, S2_storeriabs>;
+  def: Storea_pat<store,         I64, addrgp, S2_storerdabs>;
+}
+
+def: Loada_pat<atomic_load_8,  i32, addrgp, L4_loadrub_abs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+                                                   IntRegs:$c, IntRegs:$d),
+  ".error \"Should never try to emit Insert4\"",
+  [(set (i64 DoubleRegs:$dst),
+        (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+                         (i32 16)),
+                    (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+                (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+                     (i32 32))),
+            (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
+
+//===----------------------------------------------------------------------===//
+// :raw for of boundscheck:hi:lo insns
+//===----------------------------------------------------------------------===//
+
+// A4_boundscheck_lo: Detect if a register is within bounds.
+let hasSideEffects = 0 in
+def A4_boundscheck_lo: ALU64Inst <
+  (outs PredRegs:$Pd),
+  (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Pd = boundscheck($Rss, $Rtt):raw:lo"> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b00100;
+    let Inst{13} = 0b1;
+    let Inst{7-5} = 0b100;
+    let Inst{1-0} = Pd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+// A4_boundscheck_hi: Detect if a register is within bounds.
+let hasSideEffects = 0 in
+def A4_boundscheck_hi: ALU64Inst <
+  (outs PredRegs:$Pd),
+  (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
+  "$Pd = boundscheck($Rss, $Rtt):raw:hi"> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<5> Rtt;
+
+    let IClass = 0b1101;
+
+    let Inst{27-23} = 0b00100;
+    let Inst{13} = 0b1;
+    let Inst{7-5} = 0b101;
+    let Inst{1-0} = Pd;
+    let Inst{20-16} = Rss;
+    let Inst{12-8} = Rtt;
+  }
+
+let hasSideEffects = 0, isAsmParserOnly = 1 in
+def A4_boundscheck : MInst <
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
+  "$Pd=boundscheck($Rs,$Rtt)">;
+
+// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
+let isPredicateLate = 1, hasSideEffects = 0 in
+def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
+  (ins DoubleRegs:$Rs, IntRegs:$Rt),
+  "$Pd = tlbmatch($Rs, $Rt)",
+  [], "", ALU64_tc_2early_SLOT23> {
+    bits<2> Pd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1101;
+    let Inst{27-23} = 0b00100;
+    let Inst{20-16} = Rs;
+    let Inst{13} = 0b1;
+    let Inst{12-8} = Rt;
+    let Inst{7-5} = 0b011;
+    let Inst{1-0} = Pd;
+  }
+
+// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
+// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
+// We don't really want either one here.
+def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
+def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
+                            [SDNPHasChain]>;
+
+// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
+// really do a load.
+let hasSideEffects = 1, mayLoad = 0 in
+def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
+      "dcfetch($Rs + #$u11_3)",
+      [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
+      "", LD_tc_ld_SLOT0> {
+  bits<5> Rs;
+  bits<14> u11_3;
+
+  let IClass = 0b1001;
+  let Inst{27-21} = 0b0100000;
+  let Inst{20-16} = Rs;
+  let Inst{13} = 0b0;
+  let Inst{10-0} = u11_3{13-3};
+}
+
+//===----------------------------------------------------------------------===//
+// Compound instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+    isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
+    opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+    isTerminator = 1 in
+class CJInst_tstbit_R0<string px, bit np, string tnt>
+  : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+  ""#px#" = tstbit($Rs, #0); if ("
+    #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+  bits<4> Rs;
+  bits<11> r9_2;
+
+  // np: !p[01]
+  let isPredicatedFalse = np;
+  // tnt: Taken/Not Taken
+  let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+  let isTaken   = !if (!eq(tnt, "t"), 1, 0);
+
+  let IClass = 0b0001;
+  let Inst{27-26} = 0b00;
+  let Inst{25} = !if (!eq(px, "!p1"), 1,
+                 !if (!eq(px,  "p1"), 1, 0));
+  let Inst{24-23} = 0b11;
+  let Inst{22} = np;
+  let Inst{21-20} = r9_2{10-9};
+  let Inst{19-16} = Rs;
+  let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+  let Inst{9-8} = 0b11;
+  let Inst{7-1} = r9_2{8-2};
+}
+
+let Defs = [PC, P0], Uses = [P0] in {
+  def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
+  def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
+  def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
+  def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
+}
+
+let Defs = [PC, P1], Uses = [P1] in {
+  def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
+  def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
+  def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
+  def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">;
+}
+
+
+let isBranch = 1, hasSideEffects = 0,
+    isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
+    isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
+    opExtendable = 2, isTerminator = 1 in
+class CJInst_RR<string px, string op, bit np, string tnt>
+  : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
+  ""#px#" = cmp."#op#"($Rs, $Rt); if ("
+   #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+  bits<4> Rs;
+  bits<4> Rt;
+  bits<11> r9_2;
+
+  // np: !p[01]
+  let isPredicatedFalse = np;
+  // tnt: Taken/Not Taken
+  let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+  let isTaken   = !if (!eq(tnt, "t"), 1, 0);
+
+  let IClass = 0b0001;
+  let Inst{27-23} = !if (!eq(op, "eq"),  0b01000,
+                    !if (!eq(op, "gt"),  0b01001,
+                    !if (!eq(op, "gtu"), 0b01010, 0)));
+  let Inst{22} = np;
+  let Inst{21-20} = r9_2{10-9};
+  let Inst{19-16} = Rs;
+  let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+  // px: Predicate reg 0/1
+  let Inst{12} = !if (!eq(px, "!p1"), 1,
+                 !if (!eq(px,  "p1"), 1, 0));
+  let Inst{11-8} = Rt;
+  let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RR<string op, bit np> {
+  let Defs = [PC, P0], Uses = [P0] in {
+    def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">;
+    def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">;
+  }
+  let Defs = [PC, P1], Uses = [P1] in {
+    def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">;
+    def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">;
+  }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RR<string op>{
+  defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>;
+  defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
+}
+// TypeCJ Instructions compare RR and jump
+defm eq : T_pnp_CJInst_RR<"eq">;
+defm gt : T_pnp_CJInst_RR<"gt">;
+defm gtu : T_pnp_CJInst_RR<"gtu">;
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+    isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
+    opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
+class CJInst_RU5<string px, string op, bit np, string tnt>
+  : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
+  ""#px#" = cmp."#op#"($Rs, #$U5); if ("
+    #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+  bits<4> Rs;
+  bits<5> U5;
+  bits<11> r9_2;
+
+  // np: !p[01]
+  let isPredicatedFalse = np;
+  // tnt: Taken/Not Taken
+  let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+  let isTaken   = !if (!eq(tnt, "t"), 1, 0);
+
+  let IClass = 0b0001;
+  let Inst{27-26} = 0b00;
+  // px: Predicate reg 0/1
+  let Inst{25} = !if (!eq(px, "!p1"), 1,
+                 !if (!eq(px,  "p1"), 1, 0));
+  let Inst{24-23} = !if (!eq(op, "eq"),  0b00,
+                    !if (!eq(op, "gt"),  0b01,
+                    !if (!eq(op, "gtu"), 0b10, 0)));
+  let Inst{22} = np;
+  let Inst{21-20} = r9_2{10-9};
+  let Inst{19-16} = Rs;
+  let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+  let Inst{12-8} = U5;
+  let Inst{7-1} = r9_2{8-2};
+}
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_RU5<string op, bit np> {
+  let Defs = [PC, P0], Uses = [P0] in {
+    def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">;
+    def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">;
+  }
+  let Defs = [PC, P1], Uses = [P1] in {
+    def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">;
+    def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">;
+  }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_RU5<string op>{
+  defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>;
+  defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
+}
+// TypeCJ Instructions compare RI and jump
+defm eq : T_pnp_CJInst_RU5<"eq">;
+defm gt : T_pnp_CJInst_RU5<"gt">;
+defm gtu : T_pnp_CJInst_RU5<"gtu">;
+
+let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
+    isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
+    isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+    isTerminator = 1 in
+class CJInst_Rn1<string px, string op, bit np, string tnt>
+  : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
+  ""#px#" = cmp."#op#"($Rs,#-1); if ("
+  #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
+  [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
+  bits<4> Rs;
+  bits<11> r9_2;
+
+  // np: !p[01]
+  let isPredicatedFalse = np;
+  // tnt: Taken/Not Taken
+  let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
+  let isTaken   = !if (!eq(tnt, "t"), 1, 0);
+
+  let IClass = 0b0001;
+  let Inst{27-26} = 0b00;
+  let Inst{25} = !if (!eq(px, "!p1"), 1,
+                 !if (!eq(px,  "p1"), 1, 0));
+
+  let Inst{24-23} = 0b11;
+  let Inst{22} = np;
+  let Inst{21-20} = r9_2{10-9};
+  let Inst{19-16} = Rs;
+  let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
+  let Inst{9-8} = !if (!eq(op, "eq"),  0b00,
+                  !if (!eq(op, "gt"),  0b01, 0));
+  let Inst{7-1} = r9_2{8-2};
+}
+
+// P[10] taken/not taken.
+multiclass T_tnt_CJInst_Rn1<string op, bit np> {
+  let Defs = [PC, P0], Uses = [P0] in {
+    def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">;
+    def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">;
+  }
+  let Defs = [PC, P1], Uses = [P1] in {
+    def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">;
+    def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">;
+  }
+}
+// Predicate / !Predicate
+multiclass T_pnp_CJInst_Rn1<string op>{
+  defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>;
+  defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
+}
+// TypeCJ Instructions compare -1 and jump
+defm eq : T_pnp_CJInst_Rn1<"eq">;
+defm gt : T_pnp_CJInst_Rn1<"gt">;
+
+// J4_jumpseti: Direct unconditional jump and set register to immediate.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+    isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+    opExtentAlign = 2, opExtendable = 2 in
+def J4_jumpseti: CJInst <
+  (outs IntRegs:$Rd),
+  (ins u6Imm:$U6, brtarget:$r9_2),
+  "$Rd = #$U6 ; jump $r9_2"> {
+    bits<4> Rd;
+    bits<6> U6;
+    bits<11> r9_2;
+
+    let IClass = 0b0001;
+    let Inst{27-24} = 0b0110;
+    let Inst{21-20} = r9_2{10-9};
+    let Inst{19-16} = Rd;
+    let Inst{13-8} = U6;
+    let Inst{7-1} = r9_2{8-2};
+  }
+
+// J4_jumpsetr: Direct unconditional jump and transfer register.
+let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
+    isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
+    opExtentAlign = 2, opExtendable = 2 in
+def J4_jumpsetr: CJInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, brtarget:$r9_2),
+  "$Rd = $Rs ; jump $r9_2"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<11> r9_2;
+
+    let IClass = 0b0001;
+    let Inst{27-24} = 0b0111;
+    let Inst{21-20} = r9_2{10-9};
+    let Inst{11-8} = Rd;
+    let Inst{19-16} = Rs;
+    let Inst{7-1} = r9_2{8-2};
+  }
+
+// Duplex instructions
+//===----------------------------------------------------------------------===//
+include "HexagonIsetDx.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
new file mode 100644
index 0000000..823961f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -0,0 +1,937 @@
+//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V5 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY
+//===----------------------------------------------------------------------===//
+
+  //Rdd[+]=vrmpybsu(Rss,Rtt)
+let Predicates = [HasV5T] in {
+  def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
+  def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
+
+  //Rdd[+]=vrmpybu(Rss,Rtt)
+  def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
+  def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
+
+  def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
+  def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
+}
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+let Predicates = [HasV5T] in {
+  def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
+  def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu",  0b100, 0b001, 0, 0, 0>;
+
+  // Rxx+=vmpyb[s]u(Rs,Rt)
+  def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
+  def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
+
+  // Rd=vaddhub(Rss,Rtt):sat
+  let hasNewValue = 1, opNewValue = 0 in
+    def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
+}
+
+def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
+      [(set I64:$dst,
+            (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
+                 (i32 1)))], 1>,
+      Requires<[HasV5T]> {
+  bits<6> src2;
+  let Inst{13-8} = src2;
+}
+
+let isAsmParserOnly = 1 in
+def S2_asr_i_p_rnd_goodsyntax
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+    "$dst = asrrnd($src1, #$src2)">;
+
+def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
+  Requires<[HasV5T]> {
+  let Inst{13,7,4} = 0b111;
+}
+
+def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
+  Requires<[HasV5T]> {
+  let Inst{20,13,7,4} = 0b1111;
+}
+
+def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>,
+                                              SDTCisPtrTy<1>]>;
+def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+                             "$dst = CONST32(#$global)",
+                             [(set F32:$dst,
+                              (HexagonFCONST32 tglobaladdr:$global))]>,
+                             Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
+                                "$dst = CONST64(#$src1)",
+                                [(set F64:$dst, fpimm:$src1)]>,
+                                Requires<[HasV5T]>;
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
+def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
+                                "$dst = CONST32(#$src1)",
+                                [(set F32:$dst, fpimm:$src1)]>,
+                                Requires<[HasV5T]>;
+
+// Transfer immediate float.
+// Only works with single precision fp value.
+// For double precision, use CONST64_float_real, as 64bit transfer
+// can only hold 40-bit values - 32 from const ext + 8 bit immediate.
+// Make sure that complexity is more than the CONST32 pattern in
+// HexagonInstrInfo.td patterns.
+let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
+    isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
+    isCodeGenOnly = 1, isPseudo = 1 in
+def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
+                      "$dst = #$src1",
+                      [(set F32:$dst, fpimm:$src1)]>,
+                      Requires<[HasV5T]>;
+
+let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
+    validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
+def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
+                          (ins PredRegs:$src1, f32Ext:$src2),
+                          "if ($src1) $dst = #$src2", []>,
+                          Requires<[HasV5T]>;
+
+let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
+    hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
+def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
+                             (ins PredRegs:$src1, f32Ext:$src2),
+                             "if (!$src1) $dst = #$src2", []>,
+                             Requires<[HasV5T]>;
+
+def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                           SDTCisVT<1, i64>]>;
+
+def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
+
+let hasNewValue = 1, validSubTargets = HasV5SubT in
+def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+  "$Rd = popcount($Rss)",
+  [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<5> Rss;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1000011;
+    let Inst{7-5} = 0b011;
+    let Inst{4-0} = Rd;
+    let Inst{20-16} = Rss;
+  }
+
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+let isFP = 1, hasNewValue = 1, opNewValue = 0 in
+class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
+  : MInst<(outs IntRegs:$Rd),
+          (ins IntRegs:$Rs, IntRegs:$Rt),
+  "$Rd = "#mnemonic#"($Rs, $Rt)", [],
+  "" , M_tc_3or4x_SLOT23 > ,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-24} = 0b1011;
+    let Inst{23-21} = MajOp;
+    let Inst{20-16} = Rs;
+    let Inst{13} = 0b0;
+    let Inst{12-8} = Rt;
+    let Inst{7-5} = MinOp;
+    let Inst{4-0} = Rd;
+  }
+
+let isCommutable = 1 in {
+  def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
+  def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
+}
+
+def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
+
+def: Pat<(f32 (fadd F32:$src1, F32:$src2)),
+         (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fsub F32:$src1, F32:$src2)),
+         (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(f32 (fmul F32:$src1, F32:$src2)),
+         (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Itinerary = M_tc_3x_SLOT23 in {
+  def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
+  def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
+}
+
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+                        F32:$src1, F32:$src2)),
+           (F2_sfmin F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+                        F32:$src2, F32:$src1)),
+           (F2_sfmin F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
+                        F32:$src1, F32:$src2)),
+           (F2_sfmax F32:$src1, F32:$src2)>;
+
+  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
+                        F32:$src2, F32:$src1)),
+           (F2_sfmax F32:$src1, F32:$src2)>;
+}
+
+def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
+def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
+
+// F2_sfrecipa: Reciprocal approximation for division.
+let isPredicateLate = 1, isFP = 1,
+hasSideEffects = 0, hasNewValue = 1 in
+def F2_sfrecipa: MInst <
+  (outs IntRegs:$Rd, PredRegs:$Pe),
+  (ins IntRegs:$Rs, IntRegs:$Rt),
+  "$Rd, $Pe = sfrecipa($Rs, $Rt)">,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<2> Pe;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+    let Inst{27-21} = 0b1011111;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = Rt;
+    let Inst{7}     = 0b1;
+    let Inst{6-5}   = Pe;
+    let Inst{4-0}   = Rd;
+  }
+
+// F2_dfcmpeq: Floating point compare for equal.
+let isCompare = 1, isFP = 1 in
+class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
+              list<dag> pattern = [] >
+  : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
+  "$dst = "#mnemonic#"($src1, $src2)", pattern,
+  "" , ALU64_tc_2early_SLOT23 > ,
+  Requires<[HasV5T]> {
+    bits<2> dst;
+    bits<5> src1;
+    bits<5> src2;
+
+    let IClass = 0b1101;
+
+    let Inst{27-21} = 0b0010111;
+    let Inst{20-16} = src1;
+    let Inst{12-8}  = src2;
+    let Inst{7-5}   = MinOp;
+    let Inst{1-0}   = dst;
+  }
+
+class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+  : T_fcmp <mnemonic, DoubleRegs, MinOp,
+  [(set  I1:$dst, (OpNode F64:$src1, F64:$src2))]> {
+  let IClass = 0b1101;
+  let Inst{27-21} = 0b0010111;
+}
+
+class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
+  : T_fcmp <mnemonic, IntRegs, MinOp,
+  [(set  I1:$dst, (OpNode F32:$src1, F32:$src2))]> {
+  let IClass = 0b1100;
+  let Inst{27-21} = 0b0111111;
+}
+
+def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
+def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
+def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
+def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo,  0b011>;
+
+def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
+def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo,  0b001>;
+def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
+def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (IntMI F32:$src1, F32:$src2)>;
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seto F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)),
+           (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)),
+           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
+                  (F2_sfcmpgt F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setult F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
+                  (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
+                  (F2_dfcmpgt F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setole F32:$src1, fpimm:$src2)),
+           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setole F64:$src1, fpimm:$src2)),
+           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
+                  (F2_sfcmpge F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setule F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
+                  (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
+                  (F2_dfcmpge F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setone F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+  def: Pat<(i1 (setone F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+  def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+  def: Pat<(i1 (setune F32:$src1, fpimm:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
+                  (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>;
+  def: Pat<(i1 (setune F64:$src1, fpimm:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
+                  (C2_not (F2_dfcmpeq F64:$src1,
+                                        (CONST64_Float_Real fpimm:$src2))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+  // lt.
+  def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)),
+           (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)),
+           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+  // le.
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setle F32:$src1, fpimm:$src2)),
+           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setle F64:$src1, fpimm:$src2)),
+           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
+
+  // ne.
+  def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setne F32:$src1, fpimm:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
+  def: Pat<(i1 (setne F64:$src1, fpimm:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
+}
+
+// F2 convert template classes:
+let isFP = 1 in
+class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+                         SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+                         string chop ="">
+  : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
+   "$Rdd = "#mnemonic#"($Rss)"#chop,
+   [(set RCOut:$Rdd, (Op RCIn:$Rss))], "",
+   S_2op_tc_3or4x_SLOT23> {
+     bits<5> Rdd;
+     bits<5> Rss;
+
+     let IClass = 0b1000;
+
+     let Inst{27-21} = 0b0000111;
+     let Inst{20-16} = Rss;
+     let Inst{7-5} = MinOp;
+     let Inst{4-0} = Rdd;
+  }
+
+let isFP = 1 in
+class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
+                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+                        string chop ="">
+  : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
+   "$Rdd = "#mnemonic#"($Rs)"#chop,
+   [(set RCOut:$Rdd, (Op RCIn:$Rs))], "",
+   S_2op_tc_3or4x_SLOT23> {
+     bits<5> Rdd;
+     bits<5> Rs;
+
+     let IClass = 0b1000;
+
+     let Inst{27-21} = 0b0100100;
+     let Inst{20-16} = Rs;
+     let Inst{7-5} = MinOp;
+     let Inst{4-0} = Rdd;
+  }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
+                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+                        string chop ="">
+  : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
+   "$Rd = "#mnemonic#"($Rss)"#chop,
+   [(set RCOut:$Rd, (Op RCIn:$Rss))], "",
+   S_2op_tc_3or4x_SLOT23> {
+     bits<5> Rd;
+     bits<5> Rss;
+
+     let IClass = 0b1000;
+
+     let Inst{27-24} = 0b1000;
+     let Inst{23-21} = MinOp;
+     let Inst{20-16} = Rss;
+     let Inst{7-5} = 0b001;
+     let Inst{4-0} = Rd;
+  }
+
+let isFP = 1, hasNewValue = 1 in
+class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
+                        string chop ="">
+  : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+   "$Rd = "#mnemonic#"($Rs)"#chop,
+   [(set RCOut:$Rd, (Op RCIn:$Rs))], "",
+   S_2op_tc_3or4x_SLOT23> {
+     bits<5> Rd;
+     bits<5> Rs;
+
+     let IClass = 0b1000;
+
+     let Inst{27-24} = 0b1011;
+     let Inst{23-21} = MajOp;
+     let Inst{20-16} = Rs;
+     let Inst{7-5} = MinOp;
+     let Inst{4-0} = Rd;
+  }
+
+// Convert single precision to double precision and vice-versa.
+def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
+                                       fextend, F64, F32>;
+
+def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000,
+                                       fround, F32, F64>;
+
+// Convert Integer to Floating Point.
+def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010,
+                                       sint_to_fp, F32, I64>;
+def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001,
+                                       uint_to_fp, F32, I64>;
+def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000,
+                                       uint_to_fp, F32, I32>;
+def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000,
+                                       sint_to_fp, F32, I32>;
+def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011,
+                                       sint_to_fp, F64, I64>;
+def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010,
+                                        uint_to_fp, F64, I64>;
+def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001,
+                                       uint_to_fp, F64, I32>;
+def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010,
+                                       sint_to_fp, F64, I32>;
+
+// Convert Floating Point to Integer - default.
+def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101,
+                                            fp_to_uint, I32, F64, ":chop">;
+def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111,
+                                            fp_to_sint, I32, F64, ":chop">;
+def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
+                                       fp_to_uint, I32, F32, ":chop">;
+def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
+                                       fp_to_sint, I32, F32, ":chop">;
+def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110,
+                                            fp_to_sint, I64, F64, ":chop">;
+def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111,
+                                             fp_to_uint, I64, F64, ":chop">;
+def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110,
+                                       fp_to_sint, I64, F32, ":chop">;
+def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101,
+                                            fp_to_uint, I64, F32, ":chop">;
+
+// Convert Floating Point to Integer: non-chopped.
+let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
+  def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000,
+                                         fp_to_sint, I64, F64>;
+  def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001,
+                                          fp_to_uint, I64, F64>;
+  def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011,
+                                         fp_to_uint, I64, F32>;
+  def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100,
+                                         fp_to_sint, I64, F32>;
+  def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011,
+                                         fp_to_uint, I32, F64>;
+  def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100,
+                                         fp_to_sint, I32, F64>;
+  def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000,
+                                         fp_to_uint, I32, F32>;
+  def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
+                                         fp_to_sint, I32, F32>;
+}
+
+// Fix up radicand.
+let isFP = 1, hasNewValue = 1 in
+def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
+  "$Rd = sffixupr($Rs)",
+  [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<5> Rs;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1011101;
+    let Inst{20-16} = Rs;
+    let Inst{7-5}   = 0b000;
+    let Inst{4-0}   = Rd;
+  }
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+  def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+  def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+  def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+  def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
+// F2_sffma: Floating-point fused multiply add.
+let isFP = 1, hasNewValue = 1 in
+class T_sfmpy_acc <bit isSub, bit isLib>
+  : MInst<(outs IntRegs:$Rx),
+          (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
+  [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+  Requires<[HasV5T]> {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<5> Rt;
+
+    let IClass = 0b1110;
+
+    let Inst{27-21} = 0b1111000;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = Rt;
+    let Inst{7}     = 0b1;
+    let Inst{6}     = isLib;
+    let Inst{5}     = isSub;
+    let Inst{4-0}   = Rx;
+  }
+
+def F2_sffma: T_sfmpy_acc <0, 0>;
+def F2_sffms: T_sfmpy_acc <1, 0>;
+def F2_sffma_lib: T_sfmpy_acc <0, 1>;
+def F2_sffms_lib: T_sfmpy_acc <1, 1>;
+
+def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
+           (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
+
+// Floating-point fused multiply add w/ additional scaling (2**pu).
+let isFP = 1, hasNewValue = 1 in
+def F2_sffma_sc: MInst <
+  (outs IntRegs:$Rx),
+  (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
+  "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
+  [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+  Requires<[HasV5T]> {
+    bits<5> Rx;
+    bits<5> Rs;
+    bits<5> Rt;
+    bits<2> Pu;
+
+    let IClass = 0b1110;
+
+    let Inst{27-21} = 0b1111011;
+    let Inst{20-16} = Rs;
+    let Inst{13}    = 0b0;
+    let Inst{12-8}  = Rt;
+    let Inst{7}     = 0b1;
+    let Inst{6-5}   = Pu;
+    let Inst{4-0}   = Rx;
+  }
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
+    isPseudo = 1, InputType = "imm" in
+def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
+      (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
+      "$dst = mux($src1, $src2, #$src3)",
+      [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
+    Requires<[HasV5T]>;
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
+    isPseudo = 1, InputType = "imm" in
+def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
+      (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
+      "$dst = mux($src1, #$src2, $src3)",
+      [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+         (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+         (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+         (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+         (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = MUX_ir_f(p0, #i, r1)
+def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3),
+         (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = MUX_ri_f(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3),
+         (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+         (LoReg (F2_conv_df2d_chop F64:$src1))>,
+     Requires<[HasV5T]>;
+
+//===----------------------------------------------------------------------===//
+// :natural forms of vasrh and vasrhub insns
+//===----------------------------------------------------------------------===//
+// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
+// saturate, and pack.
+let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_ASRHUB<bit isSat>
+  : SInst <(outs IntRegs:$Rd),
+  (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
+  [], "", S_2op_tc_2_SLOT23>,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<5> Rss;
+    bits<4> u4;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1000011;
+    let Inst{20-16} = Rss;
+    let Inst{13-12} = 0b00;
+    let Inst{11-8} = u4;
+    let Inst{7-6} = 0b10;
+    let Inst{5} = isSat;
+    let Inst{4-0} = Rd;
+  }
+
+def S5_asrhub_rnd_sat : T_ASRHUB <0>;
+def S5_asrhub_sat : T_ASRHUB <1>;
+
+let isAsmParserOnly = 1 in
+def S5_asrhub_rnd_sat_goodsyntax
+  : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
+
+// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
+let hasSideEffects = 0 in
+def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
+                         (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rdd = vasrh($Rss, #$u4):raw">,
+  Requires<[HasV5T]> {
+    bits<5> Rdd;
+    bits<5> Rss;
+    bits<4> u4;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b0000001;
+    let Inst{20-16} = Rss;
+    let Inst{13-12} = 0b00;
+    let Inst{11-8}  = u4;
+    let Inst{7-5}   = 0b000;
+    let Inst{4-0}   = Rdd;
+  }
+
+let isAsmParserOnly = 1 in
+def S5_vasrhrnd_goodsyntax
+  : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
+
+// Floating point reciprocal square root approximation
+let Uses = [USR], isPredicateLate = 1, isFP = 1,
+    hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
+    validSubTargets = HasV5SubT in
+def F2_sfinvsqrta: SInst <
+  (outs IntRegs:$Rd, PredRegs:$Pe),
+  (ins IntRegs:$Rs),
+  "$Rd, $Pe = sfinvsqrta($Rs)" > ,
+  Requires<[HasV5T]> {
+    bits<5> Rd;
+    bits<2> Pe;
+    bits<5> Rs;
+
+    let IClass = 0b1000;
+
+    let Inst{27-21} = 0b1011111;
+    let Inst{20-16} = Rs;
+    let Inst{7} = 0b0;
+    let Inst{6-5} = Pe;
+    let Inst{4-0} = Rd;
+  }
+
+// Complex multiply 32x16
+let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
+  def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
+  def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
+}
+
+// Classify floating-point value
+let isFP = 1 in
+ def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
+
+let isFP = 1 in
+def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
+  "$Pd = dfclass($Rss, #$u5)",
+  [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
+    bits<2> Pd;
+    bits<5> Rss;
+    bits<5> u5;
+
+    let IClass = 0b1101;
+    let Inst{27-21} = 0b1100100;
+    let Inst{20-16} = Rss;
+    let Inst{12-10} = 0b000;
+    let Inst{9-5}   = u5;
+    let Inst{4-3}   = 0b10;
+    let Inst{1-0}   = Pd;
+  }
+
+// Instructions to create floating point constant
+class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
+  : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
+  "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
+  [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> {
+    bits<5> dst;
+    bits<10> src;
+
+    let IClass = 0b1101;
+    let Inst{27-24} = RegType;
+    let Inst{23}    = 0b0;
+    let Inst{22}    = isNeg;
+    let Inst{21}    = src{9};
+    let Inst{13-5}  = src{8-0};
+    let Inst{4-0}   = dst;
+  }
+
+let hasNewValue = 1, opNewValue = 0 in {
+def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
+def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
+}
+
+def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
+def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
+
+def : Pat <(fabs (f32 IntRegs:$src1)),
+           (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
+          Requires<[HasV5T]>;
+
+def : Pat <(fneg (f32 IntRegs:$src1)),
+           (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
+          Requires<[HasV5T]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
new file mode 100644
index 0000000..897ada0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -0,0 +1,2241 @@
+//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Vector store
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+{
+  class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = CVI_VM_ST,
+                IType type = TypeCVI_VM_ST>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
+
+}
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+  class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                  string cstr = "", InstrItinClass itin = CVI_VM_LD,
+                  IType type = TypeCVI_VM_LD>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = CVI_VM_ST,
+                IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+//===----------------------------------------------------------------------===//
+// Vector loads with base + immediate offset
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vload_ai<string asmStr>
+  : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
+                asmStr>;
+
+let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
+class T_vload_ai_128B<string asmStr>
+  : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
+                asmStr>;
+
+let isCVLoadable = 1, hasNewValue = 1 in {
+  def V6_vL32b_ai         : T_vload_ai <"$dst = vmem($src1+#$src2)">,
+                            V6_vL32b_ai_enc;
+  def V6_vL32b_nt_ai      : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
+                            V6_vL32b_nt_ai_enc;
+  // 128B
+  def V6_vL32b_ai_128B    : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
+                            V6_vL32b_ai_128B_enc;
+  def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
+                            V6_vL32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
+  def V6_vL32Ub_ai      : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
+                          V6_vL32Ub_ai_enc;
+  def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
+                          V6_vL32Ub_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
+    hasNewValue = 1 in {
+  def V6_vL32b_cur_ai    : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
+                           V6_vL32b_cur_ai_enc;
+  def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
+                           V6_vL32b_nt_cur_ai_enc;
+  // 128B
+  def V6_vL32b_cur_ai_128B    : T_vload_ai_128B
+                                <"$dst.cur = vmem($src1+#$src2)">,
+                                V6_vL32b_cur_ai_128B_enc;
+  def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
+                                <"$dst.cur = vmem($src1+#$src2):nt">,
+                                V6_vL32b_nt_cur_ai_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
+  def V6_vL32b_tmp_ai    : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
+                           V6_vL32b_tmp_ai_enc;
+  def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
+                           V6_vL32b_nt_tmp_ai_enc;
+  // 128B
+  def V6_vL32b_tmp_ai_128B    : T_vload_ai_128B
+                                <"$dst.tmp = vmem($src1+#$src2)">,
+                                V6_vL32b_tmp_ai_128B_enc;
+  def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
+                                <"$dst.tmp = vmem($src1+#$src2)">,
+                                V6_vL32b_nt_tmp_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
+                   RegisterClass RC, bit isNT>
+  : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_ai         : T_vstore_ai_64B <"vmem", "vS32b_ai">,
+                            V6_vS32b_ai_enc;
+  def V6_vS32b_ai_128B    : T_vstore_ai_128B <"vmem", "vS32b_ai">,
+                            V6_vS32b_ai_128B_enc;
+}
+
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_ai      : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
+                            V6_vS32b_nt_ai_enc;
+  def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
+                            V6_vS32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_ai      : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">,
+                          V6_vS32Ub_ai_enc;
+  def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">,
+                          V6_vS32Ub_ai_128B_enc;
+}
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
+    Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
+class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+  : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
+  : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
+  : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+def V6_vS32b_new_ai      : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
+def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
+                           V6_vS32b_new_ai_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_ai      : T_vstore_new_ai_64B<"vS32b_ai", 1>,
+                                V6_vS32b_nt_new_ai_enc;
+  def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
+                                V6_vS32b_nt_new_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1 in
+class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
+                        RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "
+     #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
+                            bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
+                             bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
+                      isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pred_ai     : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
+                             V6_vS32b_pred_ai_enc;
+  def V6_vS32b_npred_ai    : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
+                             V6_vS32b_npred_ai_enc;
+  // 128B
+  def V6_vS32b_pred_ai_128B    : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
+                                 V6_vS32b_pred_ai_128B_enc;
+  def V6_vS32b_npred_ai_128B   : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
+                                 V6_vS32b_npred_ai_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_ai  : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
+                             V6_vS32b_nt_pred_ai_enc;
+  def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
+                             V6_vS32b_nt_npred_ai_enc;
+  // 128B
+  def V6_vS32b_nt_pred_ai_128B  : T_vstore_pred_ai_128B
+                                  <"vmem", "vS32b_ai", 0, 1>,
+                                  V6_vS32b_nt_pred_ai_128B_enc;
+  def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
+                                  <"vmem", "vS32b_ai", 1, 1>,
+                                  V6_vS32b_nt_npred_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_ai  : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
+                           V6_vS32Ub_pred_ai_enc;
+  def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
+                           V6_vS32Ub_npred_ai_enc;
+  // 128B
+  def V6_vS32Ub_pred_ai_128B  :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
+                               V6_vS32Ub_pred_ai_128B_enc;
+  def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
+                               V6_vS32Ub_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset in
+class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
+                         bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs),
+               (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_ai  : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
+def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
+                         V6_vS32b_nqpred_ai_enc;
+def V6_vS32b_nt_qpred_ai  : T_vstore_qpred_ai_64B <0, 1>,
+                            V6_vS32b_nt_qpred_ai_enc;
+def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
+                            V6_vS32b_nt_nqpred_ai_enc;
+// 128B
+def V6_vS32b_qpred_ai_128B  : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
+def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
+                              V6_vS32b_nqpred_ai_128B_enc;
+def V6_vS32b_nt_qpred_ai_128B  : T_vstore_qpred_ai_128B<0, 1>,
+                                 V6_vS32b_nt_qpred_ai_128B_enc;
+def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
+                                 V6_vS32b_nt_nqpred_ai_128B_enc;
+
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
+    isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
+class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
+                            bit isPredNot, bit isNT>
+  : V6_STInst <(outs),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
+                          isPredNot, isNT>;
+
+
+def V6_vS32b_new_pred_ai     : T_vstore_new_pred_ai_64B <"vS32b_ai">,
+                               V6_vS32b_new_pred_ai_enc;
+def V6_vS32b_new_npred_ai    : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
+                               V6_vS32b_new_npred_ai_enc;
+// 128B
+def V6_vS32b_new_pred_ai_128B     : T_vstore_new_pred_ai_128B <"vS32b_ai">,
+                                    V6_vS32b_new_pred_ai_128B_enc;
+def V6_vS32b_new_npred_ai_128B    : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
+                                    V6_vS32b_new_npred_ai_128B_enc;
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pred_ai  : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
+                                 V6_vS32b_nt_new_pred_ai_enc;
+  def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
+                                 V6_vS32b_nt_new_npred_ai_enc;
+  // 128B
+  def V6_vS32b_nt_new_pred_ai_128B  : T_vstore_new_pred_ai_128B
+                                      <"vS32b_ai", 0, 1>,
+                                      V6_vS32b_nt_new_pred_ai_128B_enc;
+  def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
+                                      <"vS32b_ai", 1, 1>,
+                                      V6_vS32b_nt_new_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, hasNewValue = 1 in
+class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
+  : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
+    "$src1 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vload_pi_64B <string asmStr>
+  : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vload_pi_128B <string asmStr>
+  : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
+
+let isCVLoadable = 1 in {
+  def V6_vL32b_pi    : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
+                       V6_vL32b_pi_enc;
+  def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
+                       V6_vL32b_nt_pi_enc;
+  // 128B
+  def V6_vL32b_pi_128B    : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
+                            V6_vL32b_pi_128B_enc;
+  def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
+                            V6_vL32b_nt_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
+  def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
+                     V6_vL32Ub_pi_enc;
+  // 128B
+  def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
+                          V6_vL32Ub_pi_128B_enc;
+}
+
+let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
+  def V6_vL32b_cur_pi    : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
+                           V6_vL32b_cur_pi_enc;
+  def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
+                           V6_vL32b_nt_cur_pi_enc;
+  // 128B
+  def V6_vL32b_cur_pi_128B    : T_vload_pi_128B
+                                <"$dst.cur = vmem($src1++#$src2)">,
+                                V6_vL32b_cur_pi_128B_enc;
+  def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
+                                <"$dst.cur = vmem($src1++#$src2):nt">,
+                                V6_vL32b_nt_cur_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+  def V6_vL32b_tmp_pi    : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
+                           V6_vL32b_tmp_pi_enc;
+  def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
+                           V6_vL32b_nt_tmp_pi_enc;
+  //128B
+  def V6_vL32b_tmp_pi_128B    : T_vload_pi_128B
+                                <"$dst.tmp = vmem($src1++#$src2)">,
+                                V6_vL32b_tmp_pi_128B_enc;
+  def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
+                                <"$dst.tmp = vmem($src1++#$src2):nt">,
+                                V6_vL32b_nt_tmp_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
+                   RegisterClass RC, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let accessSize = Vector64Access in
+class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pi      : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
+  def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
+                         V6_vS32b_pi_128B_enc;
+}
+
+let isNVStorable = 1 , isNonTemporal = 1  in {
+  def V6_vS32b_nt_pi      : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
+                            V6_vS32b_nt_pi_enc;
+  def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
+                            V6_vS32b_nt_pi_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pi      : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
+                          V6_vS32Ub_pi_enc;
+  def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
+                          V6_vS32Ub_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment unconditional .new vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, isNVStore = 1 in
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+    opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+    "$src1 = $_dst_">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
+  : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
+  : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
+
+
+def V6_vS32b_new_pi      : T_vstore_new_pi_64B <"vS32b_pi">,
+                           V6_vS32b_new_pi_enc;
+def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
+                           V6_vS32b_new_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pi      : T_vstore_new_pi_64B <"vS32b_pi", 1>,
+                                V6_vS32b_nt_new_pi_enc;
+  def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
+                                V6_vS32b_nt_new_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, addrMode = PostInc in
+class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
+                        RegisterClass RC, bit isPredNot, bit isNT>
+  : V6_STInst<(outs IntRegs:$_dst_),
+             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
+                            bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
+                             bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
+                      isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pred_pi     : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
+                             V6_vS32b_pred_pi_enc;
+  def V6_vS32b_npred_pi    : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
+                             V6_vS32b_npred_pi_enc;
+  // 128B
+  def V6_vS32b_pred_pi_128B  : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
+                               V6_vS32b_pred_pi_128B_enc;
+  def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
+                               V6_vS32b_npred_pi_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_pi  : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
+                             V6_vS32b_nt_pred_pi_enc;
+  def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
+                             V6_vS32b_nt_npred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_pred_pi_128B  : T_vstore_pred_pi_128B
+                                  <"vmem", "vS32b_pi", 0, 1>,
+                                  V6_vS32b_nt_pred_pi_128B_enc;
+  def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
+                                  <"vmem", "vS32b_pi", 1, 1>,
+                                  V6_vS32b_nt_npred_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_pi  : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
+                           V6_vS32Ub_pred_pi_enc;
+  def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
+                           V6_vS32Ub_npred_pi_enc;
+  // 128B
+  def V6_vS32Ub_pred_pi_128B  : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
+                                V6_vS32Ub_pred_pi_128B_enc;
+  def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
+                                V6_vS32Ub_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
+                         bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_pi  : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
+def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
+// 128B
+def V6_vS32b_qpred_pi_128B  : T_vstore_qpred_pi_128B,
+                              V6_vS32b_qpred_pi_128B_enc;
+def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
+                              V6_vS32b_nqpred_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_qpred_pi  : T_vstore_qpred_pi_64B <0, 1>,
+                              V6_vS32b_nt_qpred_pi_enc;
+  def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
+                              V6_vS32b_nt_nqpred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_qpred_pi_128B  : T_vstore_qpred_pi_128B<0, 1>,
+                                   V6_vS32b_nt_qpred_pi_128B_enc;
+  def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
+                                   V6_vS32b_nt_nqpred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+    isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
+class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
+                            bit isPredNot, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new", [],
+    "$src2 = $_dst_"> , NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
+                          isPredNot, isNT>;
+
+def V6_vS32b_new_pred_pi     : T_vstore_new_pred_pi_64B <"vS32b_pi">,
+                               V6_vS32b_new_pred_pi_enc;
+def V6_vS32b_new_npred_pi    : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
+                               V6_vS32b_new_npred_pi_enc;
+// 128B
+def V6_vS32b_new_pred_pi_128B    : T_vstore_new_pred_pi_128B <"vS32b_pi">,
+                                   V6_vS32b_new_pred_pi_128B_enc;
+def V6_vS32b_new_npred_pi_128B   : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
+                                   V6_vS32b_new_npred_pi_128B_enc;
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pred_pi  : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
+                                 V6_vS32b_nt_new_pred_pi_enc;
+  def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
+                                 V6_vS32b_nt_new_npred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
+                                     <"vS32b_pi", 0, 1>,
+                                     V6_vS32b_nt_new_pred_pi_128B_enc;
+  def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
+                                      <"vS32b_pi", 1, 1>,
+                                      V6_vS32b_nt_new_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with register offset
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_vload_ppu<string asmStr>
+  : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let isCVLoadable = 1 in {
+  def V6_vL32b_ppu    : T_vload_ppu <"$dst = vmem($src1++$src2)">,
+                        V6_vL32b_ppu_enc;
+  def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
+                        V6_vL32b_nt_ppu_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
+def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
+                     V6_vL32Ub_ppu_enc;
+
+let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
+  def V6_vL32b_cur_ppu    : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
+                             V6_vL32b_cur_ppu_enc;
+  def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
+                             V6_vL32b_nt_cur_ppu_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+  def V6_vL32b_tmp_ppu    : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
+                             V6_vL32b_tmp_ppu_enc;
+  def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
+                             V6_vL32b_nt_tmp_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset
+//===----------------------------------------------------------------------===//
+class T_vstore_ppu <string mnemonic, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+    mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_ppu    : T_vstore_ppu <"vmem">,
+                        V6_vS32b_ppu_enc;
+  let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
+  def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
+                        V6_vS32b_nt_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
+def V6_vS32Ub_ppu   : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+    opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_ppu <bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+    "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let BaseOpcode = "vS32b_ppu" in
+def V6_vS32b_new_ppu    : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
+def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst<(outs IntRegs:$_dst_),
+           (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
+  def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_ppu  : T_vstore_pred_ppu <"vmem", 0, 1>,
+                              V6_vS32b_nt_pred_ppu_enc;
+  def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
+                              V6_vS32b_nt_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
+    Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_ppu  : T_vstore_pred_ppu <"vmemu">,
+                            V6_vS32Ub_pred_ppu_enc;
+  def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
+                            V6_vS32Ub_npred_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+        (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel;
+
+def V6_vS32b_qpred_ppu  : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
+def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
+def V6_vS32b_nt_qpred_ppu  : T_vstore_qpred_ppu<0, 1>,
+                             V6_vS32b_nt_qpred_ppu_enc;
+def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
+                             V6_vS32b_nt_nqpred_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+    isNewValue = 1, opNewValue = 4, isNVStore = 1 in
+class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+           (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+}
+
+let BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_new_pred_ppu  : T_vstore_new_pred_ppu,
+                               V6_vS32b_new_pred_ppu_enc;
+  def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
+                               V6_vS32b_new_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+def V6_vS32b_nt_new_pred_ppu :  T_vstore_new_pred_ppu<0, 1>,
+                                V6_vS32b_nt_new_pred_ppu_enc;
+def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
+                                V6_vS32b_nt_new_npred_ppu_enc;
+}
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
+        VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
+                #mnemonic#"($addr+#$off) = $src", []>;
+
+def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
+                    Requires<[HasV60T, UseHVXSgl]>;
+def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
+                         Requires<[HasV60T, UseHVXDbl]>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+            (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+            (STrivv_indexed_128B IntRegs:$addr, #0,
+                                 (VTDbl VecDblRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned stores
+  def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr),
+            (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  // 128B Aligned stores
+  def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+            (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+
+  // Fold Add R+IFF into vector store.
+  let AddedComplexity = 10 in
+  def : Pat<(store (VTSgl VectorRegs:$src1),
+                   (add IntRegs:$src2, s4_6ImmPred:$offset)),
+            (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+                         (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  // Fold Add R+IFF into vector store 128B.
+  let AddedComplexity = 10 in
+  def : Pat<(store (VTDbl VectorRegs128B:$src1),
+                   (add IntRegs:$src2, s4_7ImmPred:$offset)),
+            (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+                              (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+}
+
+defm : vS32b_ai_pats <v64i8,  v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64,  v16i64>;
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
+  : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
+               "$dst="#mnemonic#"($addr+#$off)",
+               []>,
+               Requires<[HasV60T,UseHVXSgl]>;
+
+def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
+def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat < (VTSgl (load IntRegs:$addr)),
+              (LDrivv_indexed IntRegs:$addr, #0) >,
+              Requires<[UseHVXSgl]>;
+
+  def : Pat < (VTDbl (load IntRegs:$addr)),
+              (LDrivv_indexed_128B IntRegs:$addr, #0) >,
+              Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned loads
+  def : Pat < (VTSgl (load IntRegs:$addr)),
+              (V6_vL32b_ai IntRegs:$addr, #0) >,
+              Requires<[UseHVXSgl]>;
+
+  // 128B Load
+  def : Pat < (VTDbl (load IntRegs:$addr)),
+              (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
+              Requires<[UseHVXDbl]>;
+
+  // Fold Add R+IFF into vector load.
+  let AddedComplexity = 10 in
+  def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))),
+            (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+             Requires<[UseHVXDbl]>;
+
+  let AddedComplexity = 10 in
+  def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))),
+            (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+            Requires<[UseHVXSgl]>;
+}
+
+defm : vL32b_ai_pats <v64i8,  v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64,  v16i64>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriq_pred_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_vec_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+
+def STriq_pred_vec_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector predicate pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store vector pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriv_pseudo_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def STriv_pseudo_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STrivv_pseudo_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def STrivv_pseudo_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = CVI_VA_DV,
+              IType type = TypeCVI_VA_DV>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
+            (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
+               (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
+               ".error \"should not emit\" ",
+               []>,
+               Requires<[HasV60T,UseHVXSgl]>;
+}
+
+def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
+                             (v16i32 VectorRegs:$tval),
+                             (v16i32 VectorRegs:$fval), SETEQ)),
+      (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
+                                (i32 IntRegs:$rhs))),
+                                (v16i32 VectorRegs:$tval),
+                                (v16i32 VectorRegs:$fval)))>;
+
+
+let hasNewValue = 1 in
+class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+    asmString >;
+
+multiclass T_vmpy <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_vmpy <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                      !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_vmpy_VV <string asmString>:
+  T_vmpy <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_vmpy_WW <string asmString>:
+  T_vmpy <asmString, VecDblRegs, VecDblRegs>;
+
+multiclass T_vmpy_VW <string asmString>:
+  T_vmpy <asmString, VectorRegs, VecDblRegs>;
+
+multiclass T_vmpy_WV <string asmString>:
+  T_vmpy <asmString, VecDblRegs, VectorRegs>;
+
+defm V6_vtmpyb   :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
+defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
+defm V6_vdsaduh  :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
+defm V6_vmpybus  :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
+defm V6_vmpabus  :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
+defm V6_vmpahb   :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
+defm V6_vmpyh    :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
+defm V6_vmpyuh   :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
+defm V6_vmpyiwh  :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
+defm V6_vtmpyhb  :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
+defm V6_vmpyub   :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
+
+let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
+defm V6_vmpyihb  :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
+
+defm V6_vdmpybus_dv :
+     T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
+defm V6_vdmpyhsusat :
+     T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
+defm V6_vdmpyhsuisat :
+     T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
+defm V6_vdmpyhsat :
+     T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
+defm V6_vdmpyhisat :
+     T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
+defm V6_vdmpyhb_dv :
+     T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
+defm V6_vmpyhss :
+     T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
+defm V6_vmpyhsrs :
+     T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in
+defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vdmpyhb  : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
+defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
+defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
+defm V6_vmpyiwb  : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
+defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrw  : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
+defm V6_vasrh  : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
+defm V6_vaslw  : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
+defm V6_vaslh  : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
+defm V6_vlsrw  : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
+defm V6_vlsrh  : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
+}
+
+let hasNewValue = 1 in
+class T_HVX_alu <string asmString, InstrItinClass itin,
+                 RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+    asmString >{
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu <string asmString, RegisterClass RCout,
+           RegisterClass RCin, InstrItinClass itin> {
+  def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu <asmString, itin,
+                              !cast<RegisterClass>(RCout#"128B"),
+                              !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_alu_VV <string asmString>:
+  T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_WW <string asmString>:
+  T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
+
+multiclass T_HVX_alu_WV <string asmString>:
+  T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
+
+
+let Itinerary  =  CVI_VX, Type  =  TypeCVI_VX in {
+defm V6_vrmpyubv :
+     T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
+defm V6_vrmpybv :
+     T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
+defm V6_vrmpybusv :
+     T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
+defm V6_vabsdiffub :
+     T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
+defm V6_vabsdiffh :
+     T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
+defm V6_vabsdiffuh :
+     T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
+defm V6_vabsdiffw :
+     T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
+}
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhvsat :
+     T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
+defm V6_vmpyhvsrs :
+     T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
+defm V6_vmpyih :
+     T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
+}
+
+defm V6_vand :
+     T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
+defm V6_vor :
+     T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
+defm V6_vxor :
+     T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
+defm V6_vaddw :
+     T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
+defm V6_vaddubsat :
+     T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
+defm V6_vadduhsat :
+     T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
+defm V6_vaddhsat :
+     T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
+defm V6_vaddwsat :
+     T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
+defm V6_vsubb :
+     T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
+defm V6_vsubh :
+     T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
+defm V6_vsubw :
+     T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
+defm V6_vsububsat :
+     T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
+defm V6_vsubuhsat :
+     T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
+defm V6_vsubhsat :
+     T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
+defm V6_vsubwsat :
+     T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
+defm V6_vavgub :
+     T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
+defm V6_vavguh :
+     T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
+defm V6_vavgh :
+     T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
+defm V6_vavgw :
+     T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
+defm V6_vnavgub :
+     T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
+defm V6_vnavgh :
+     T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
+defm V6_vnavgw :
+     T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
+defm V6_vavgubrnd :
+     T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
+defm V6_vavguhrnd :
+     T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
+defm V6_vavghrnd :
+     T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
+defm V6_vavgwrnd :
+     T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
+
+defm V6_vmpybv :
+     T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
+defm V6_vmpyubv :
+     T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
+defm V6_vmpybusv :
+     T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
+defm V6_vmpyhv :
+     T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
+defm V6_vmpyuhv :
+     T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
+defm V6_vmpyhus :
+     T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
+defm V6_vaddubh :
+     T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
+defm V6_vadduhw :
+     T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
+defm V6_vaddhw :
+     T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
+defm V6_vsububh :
+     T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
+defm V6_vsubuhw :
+     T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
+defm V6_vsubhw :
+     T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
+
+defm V6_vaddb_dv :
+     T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
+defm V6_vaddh_dv :
+     T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
+defm V6_vaddw_dv :
+     T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
+defm V6_vaddubsat_dv :
+     T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
+defm V6_vadduhsat_dv :
+     T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
+defm V6_vaddhsat_dv :
+     T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
+defm V6_vaddwsat_dv :
+     T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
+defm V6_vsubb_dv :
+     T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
+defm V6_vsubh_dv :
+     T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
+defm V6_vsubw_dv :
+     T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
+defm V6_vsububsat_dv :
+     T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
+defm V6_vsubuhsat_dv :
+     T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
+defm V6_vsubhsat_dv :
+     T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
+defm V6_vsubwsat_dv :
+     T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
+defm V6_vmpabusv :
+     T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
+defm V6_vmpabuuv :
+     T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1 in
+class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
+                     RegisterClass RCin1, RegisterClass RCin2>
+  : CVI_VA_Resource1 <(outs RCout:$dst),
+                      (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
+           RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
+  def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
+                   !cast<RegisterClass>(RCout#"128B"),
+                   !cast<RegisterClass>(RCin1#"128B"),
+                   !cast<RegisterClass>(RCin2#
+                   !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
+}
+
+multiclass T_HVX_vmpyacc_VVR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
+
+multiclass T_HVX_vmpyacc_VWR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WWR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_VVV <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVV <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+
+defm V6_vtmpyb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
+     V6_vtmpyb_acc_enc;
+defm V6_vtmpybus_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
+     V6_vtmpybus_acc_enc;
+defm V6_vtmpyhb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
+     V6_vtmpyhb_acc_enc;
+defm V6_vdmpyhb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+     V6_vdmpyhb_acc_enc;
+defm V6_vrmpyub_acc :
+     T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+     V6_vrmpyub_acc_enc;
+defm V6_vrmpybus_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+     V6_vrmpybus_acc_enc;
+defm V6_vdmpybus_acc :
+     T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+     V6_vdmpybus_acc_enc;
+defm V6_vdmpybus_dv_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+     V6_vdmpybus_dv_acc_enc;
+defm V6_vdmpyhsuisat_acc :
+     T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
+     V6_vdmpyhsuisat_acc_enc;
+defm V6_vdmpyhisat_acc :
+     T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhisat_acc_enc;
+defm V6_vdmpyhb_dv_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+     V6_vdmpyhb_dv_acc_enc;
+defm V6_vmpybus_acc :
+     T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
+     V6_vmpybus_acc_enc;
+defm V6_vmpabus_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
+     V6_vmpabus_acc_enc;
+defm V6_vmpahb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
+     V6_vmpahb_acc_enc;
+defm V6_vmpyhsat_acc :
+     T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
+     V6_vmpyhsat_acc_enc;
+defm V6_vmpyuh_acc :
+     T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+     V6_vmpyuh_acc_enc;
+defm V6_vmpyiwb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
+     V6_vmpyiwb_acc_enc;
+defm V6_vdsaduh_acc :
+     T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
+     V6_vdsaduh_acc_enc;
+defm V6_vmpyihb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
+     V6_vmpyihb_acc_enc;
+defm V6_vmpyub_acc :
+     T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+     V6_vmpyub_acc_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhsusat_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
+     V6_vdmpyhsusat_acc_enc;
+defm V6_vdmpyhsat_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhsat_acc_enc;
+defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
+     <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vaslw_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
+defm V6_vasrw_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
+}
+
+defm V6_vdmpyhvsat_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhvsat_acc_enc;
+defm V6_vmpybusv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
+     V6_vmpybusv_acc_enc;
+defm V6_vmpybv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
+defm V6_vmpyhus_acc :
+     T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
+defm V6_vmpyhv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
+defm V6_vmpyiewh_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
+     V6_vmpyiewh_acc_enc;
+defm V6_vmpyiewuh_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
+     V6_vmpyiewuh_acc_enc;
+defm V6_vmpyih_acc :
+     T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
+defm V6_vmpyowh_rnd_sacc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
+     V6_vmpyowh_rnd_sacc_enc;
+defm V6_vmpyowh_sacc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
+     V6_vmpyowh_sacc_enc;
+defm V6_vmpyubv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+     V6_vmpyubv_acc_enc;
+defm V6_vmpyuhv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+     V6_vmpyuhv_acc_enc;
+defm V6_vrmpybusv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+     V6_vrmpybusv_acc_enc;
+defm V6_vrmpybv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
+defm V6_vrmpyubv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+     V6_vrmpyubv_acc_enc;
+
+
+class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst),
+                      (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = CVI_VA;
+  let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_vcmp <string asmString> {
+  def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
+defm V6_veqh_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
+defm V6_veqw_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
+defm V6_vgtb_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
+defm V6_vgth_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
+defm V6_vgtw_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
+defm V6_vgtub_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
+defm V6_vgtuh_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
+defm V6_vgtuw_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
+defm V6_veqb_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
+defm V6_veqh_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
+defm V6_veqw_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
+defm V6_vgtb_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
+defm V6_vgth_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
+defm V6_vgtw_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
+defm V6_vgtub_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
+defm V6_vgtuh_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
+defm V6_vgtuw_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
+defm V6_veqb_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
+defm V6_veqh_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
+defm V6_veqw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
+defm V6_vgtb_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
+defm V6_vgth_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
+defm V6_vgtw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
+defm V6_vgtub_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
+defm V6_vgtuh_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
+defm V6_vgtuw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
+
+defm V6_vminub :
+     T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
+defm V6_vminuh :
+     T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
+defm V6_vminh :
+     T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
+defm V6_vminw :
+     T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
+defm V6_vmaxub :
+     T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
+defm V6_vmaxuh :
+     T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
+defm V6_vmaxh :
+     T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
+defm V6_vmaxw :
+     T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
+defm V6_vshuffeb :
+     T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
+defm V6_vshuffob :
+     T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
+defm V6_vshufeh :
+     T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
+defm V6_vshufoh :
+     T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vmpyowh_rnd :
+     T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
+     V6_vmpyowh_rnd_enc;
+defm V6_vmpyiewuh :
+     T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
+defm V6_vmpyewuh :
+     T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
+defm V6_vmpyowh :
+     T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
+defm V6_vmpyiowh :
+     T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
+}
+let Itinerary = CVI_VX, Type = TypeCVI_VX in
+defm V6_vmpyieoh :
+     T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
+defm V6_vshufoeh :
+     T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
+defm V6_vshufoeb :
+     T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
+}
+
+let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+defm V6_vcombine :
+     T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+      SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+                                  (v16i32 VectorRegs:$Vt))),
+         (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+                                  (v32i32 VecDblRegs:$Vt))),
+         (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+
+let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
+defm V6_vsathub :
+     T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
+defm V6_vsatwh :
+     T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vroundwh :
+     T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
+defm V6_vroundwuh :
+     T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
+defm V6_vroundhb :
+     T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
+defm V6_vroundhub :
+     T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
+defm V6_vasrwv :
+     T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
+defm V6_vlsrwv :
+     T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
+defm V6_vlsrhv :
+     T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
+defm V6_vasrhv :
+     T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
+defm V6_vaslwv :
+     T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
+defm V6_vaslhv :
+     T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
+}
+
+defm V6_vaddb :
+     T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
+defm V6_vaddh :
+     T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdelta :
+     T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
+defm V6_vrdelta :
+     T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
+defm V6_vdealb4w :
+     T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
+defm V6_vpackeb :
+     T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
+defm V6_vpackeh :
+     T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
+defm V6_vpackhub_sat :
+     T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
+defm V6_vpackhb_sat :
+     T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
+defm V6_vpackwuh_sat :
+     T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
+defm V6_vpackwh_sat :
+     T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
+defm V6_vpackob :
+     T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
+defm V6_vpackoh :
+     T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
+  : CVI_VA_Resource1 <(outs RC2:$dst),
+                      (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = CVI_VA;
+  let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_condALU <string asmString> {
+  def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_vaddbq  : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
+                  V6_vaddbq_enc;
+defm V6_vaddhq  : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
+                  V6_vaddhq_enc;
+defm V6_vaddwq  : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
+                  V6_vaddwq_enc;
+defm V6_vsubbq  : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
+                  V6_vsubbq_enc;
+defm V6_vsubhq  : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
+                  V6_vsubhq_enc;
+defm V6_vsubwq  : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
+                  V6_vsubwq_enc;
+defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
+                  V6_vaddbnq_enc;
+defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
+                  V6_vaddhnq_enc;
+defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
+                  V6_vaddwnq_enc;
+defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
+                  V6_vsubbnq_enc;
+defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
+                  V6_vsubhnq_enc;
+defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
+                  V6_vsubwnq_enc;
+
+let hasNewValue = 1 in
+class T_HVX_alu_2op <string asmString, InstrItinClass itin,
+                 RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
+    asmString >{
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
+           RegisterClass RCin, InstrItinClass itin> {
+  def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu_2op <asmString, itin,
+                              !cast<RegisterClass>(RCout#"128B"),
+                              !cast<RegisterClass>(RCin#"128B")>;
+}
+
+let hasNewValue = 1 in
+multiclass T_HVX_alu_2op_VV <string asmString>:
+  T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_2op_WV <string asmString>:
+  T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
+
+
+defm V6_vabsh     : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
+                    V6_vabsh_enc;
+defm V6_vabsw     : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
+                    V6_vabsw_enc;
+defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
+                    V6_vabsh_sat_enc;
+defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
+                    V6_vabsw_sat_enc;
+defm V6_vnot      : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
+                    V6_vnot_enc;
+defm V6_vassign   : T_HVX_alu_2op_VV <"$dst = $src1">,
+                    V6_vassign_enc;
+
+defm V6_vzb       : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
+                    V6_vzb_enc;
+defm V6_vzh       : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
+                    V6_vzh_enc;
+defm V6_vsb       : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
+                    V6_vsb_enc;
+defm V6_vsh       : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
+                    V6_vsh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdealh    : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
+                    V6_vdealh_enc;
+defm V6_vdealb    : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
+                    V6_vdealb_enc;
+defm V6_vshuffh   : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
+                    V6_vshuffh_enc;
+defm V6_vshuffb   : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
+                    V6_vshuffb_enc;
+}
+
+let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
+defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
+                    V6_vunpackub_enc;
+defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
+                    V6_vunpackuh_enc;
+defm V6_vunpackb  : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
+                    V6_vunpackb_enc;
+defm V6_vunpackh  : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
+                    V6_vunpackh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vcl0w     : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
+                    V6_vcl0w_enc;
+defm V6_vcl0h     : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
+                    V6_vcl0h_enc;
+defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
+                    V6_vnormamtw_enc;
+defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
+                    V6_vnormamth_enc;
+defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
+                     V6_vpopcounth_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
+    Type = TypeCVI_VX_DV in
+class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$dst),
+                      (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+    asmString, [], "$dst = $_src_" > ;
+
+
+multiclass T_HVX_vmpyacc2 <string asmString> {
+  def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi_acc :
+     T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
+     V6_vrmpybusi_acc_enc;
+defm V6_vrsadubi_acc :
+     T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
+     V6_vrsadubi_acc_enc;
+defm V6_vrmpyubi_acc :
+     T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
+     V6_vrmpyubi_acc_enc;
+
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
+class T_HVX_vmpy2 <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+    asmString>;
+
+
+multiclass T_HVX_vmpy2 <string asmString> {
+  def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi :
+     T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
+defm V6_vrsadubi :
+     T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
+defm V6_vrmpyubi :
+     T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
+
+
+let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
+    hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
+class T_HVX_perm <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
+                      (ins RC:$src1, RC:$src2, IntRegs:$src3),
+    asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
+
+multiclass T_HVX_perm <string asmString> {
+  def NAME : T_HVX_perm <asmString, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
+  defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
+  defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
+}
+
+// Conditional vector move.
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_cmov <bit isPredNot, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
+    "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_cmov <bit isPredNot = 0> {
+  def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
+}
+
+defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
+defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
+
+// Conditional vector combine.
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
+    hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 < (outs RCout:$dst),
+    (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
+    "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_ccombine <bit isPredNot = 0> {
+  def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
+defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
+
+let hasNewValue = 1 in
+class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst),
+    (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+    asmString >;
+
+multiclass T_HVX_shift <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_HVX_shift <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                           !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_shift_VV <string asmString>:
+  T_HVX_shift <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_shift_WV <string asmString>:
+  T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
+defm V6_valignb :
+     T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
+defm V6_vlalignb :
+     T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrwh :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
+defm V6_vasrwhsat :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
+     V6_vasrwhsat_enc;
+defm V6_vasrwhrndsat :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
+     V6_vasrwhrndsat_enc;
+defm V6_vasrwuhsat :
+     T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
+     V6_vasrwuhsat_enc;
+defm V6_vasrhubsat :
+     T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
+     V6_vasrhubsat_enc;
+defm V6_vasrhubrndsat :
+     T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+     V6_vasrhubrndsat_enc;
+defm V6_vasrhbrndsat :
+     T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+     V6_vasrhbrndsat_enc;
+}
+
+// Assembler mapped -- alias?
+//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
+defm V6_vshuffvdd :
+     T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
+defm V6_vdealvdd :
+     T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
+}
+
+let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
+class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
+    asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_unpack <string asmString> {
+  def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
+defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
+    hasSideEffects = 0 in
+class T_HVX_valign <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+    asmString>;
+
+multiclass T_HVX_valign <string asmString> {
+  def NAME : T_HVX_valign <asmString, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
+}
+
+defm V6_valignbi :
+     T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
+defm V6_vlalignbi :
+     T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+class T_HVX_predAlu <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
+    asmString>;
+
+multiclass T_HVX_predAlu <string asmString> {
+  def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
+}
+
+defm V6_pred_and  : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
+defm V6_pred_or   : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
+defm V6_pred_xor  : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
+defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
+defm V6_pred_and_n :
+     T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_prednot <RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
+    "$dst = not($src1)">, V6_pred_not_enc;
+
+def V6_pred_not : T_HVX_prednot <VecPredRegs>;
+let isCodeGenOnly =  1 in
+def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+    asmString >;
+
+multiclass T_HVX_vcmp2 <string asmString> {
+  def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
+defm V6_veqh : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
+defm V6_veqw : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
+defm V6_vgtb : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
+defm V6_vgth : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
+defm V6_vgtw : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
+defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
+defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
+defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
+
+let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+    "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
+
+def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
+
+let isAccumulator = 1 in
+class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+    "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
+
+def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
+
+let hasNewValue =  1, hasSideEffects = 0 in
+class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCin:$src1, IntRegs:$src2),
+    "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
+
+def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_lvsplatw <RegisterClass RC>
+  : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
+    "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
+
+def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
+
+
+let hasNewValue = 1 in
+class T_V6_vinsertwr <RegisterClass RC>
+  : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
+    "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
+    V6_vinsertwr_enc;
+
+def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
+class T_V6_pred_scalar2 <RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
+    "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
+
+def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
+
+class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+    "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
+
+def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
+  : SInst2 <(outs RC:$dst), (ins  RC:$src1, ImmOp:$src2), asmString>;
+
+class T_HVX_rol_R <string asmString>
+  : T_HVX_rol <asmString, IntRegs, u5Imm>;
+class T_HVX_rol_P <string asmString>
+  : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+
+def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
+let hasNewValue = 1, opNewValue = 0 in
+def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
+  : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
+    asmString, [], "$dst = $_src_" >;
+
+class T_HVX_rol_acc_P <string asmString>
+  : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+
+class T_HVX_rol_acc_R <string asmString>
+  : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+
+def S6_rol_i_p_nac :
+    T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
+def S6_rol_i_p_acc :
+    T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
+def S6_rol_i_p_and :
+    T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
+def S6_rol_i_p_or  :
+    T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
+def S6_rol_i_p_xacc :
+    T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
+
+let hasNewValue = 1, opNewValue = 0 in {
+def S6_rol_i_r_nac :
+    T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
+def S6_rol_i_r_acc :
+    T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
+def S6_rol_i_r_and :
+    T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
+def S6_rol_i_r_or :
+    T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
+def S6_rol_i_r_xacc :
+    T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
+}
+
+let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
+class T_V6_extractw <RegisterClass RC>
+  : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
+    "$dst = vextract($src1,$src2)">, V6_extractw_enc;
+
+def V6_extractw : T_V6_extractw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
+
+let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT  in
+class T_sys0op <string asmString>
+  : ST1Inst <(outs), (ins), asmString>;
+
+let isSolo = 1, validSubTargets = HasV55SubT in {
+def Y5_l2gunlock   : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
+def Y5_l2gclean    : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
+def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
+}
+
+class T_sys1op <string asmString, RegisterClass RC>
+  : ST1Inst <(outs), (ins RC:$src1), asmString>;
+
+class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
+class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
+
+let isSoloAX = 1, validSubTargets = HasV55SubT in
+def Y5_l2unlocka     : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
+
+let isSolo = 1, validSubTargets = HasV60SubT in {
+def Y6_l2gcleanpa    : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
+def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
+}
+
+let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
+    validSubTargets = HasV55SubT in
+def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
+  "$dst = l2locka($src1)">, Y5_l2locka_enc;
+
+// not defined on etc side. why?
+// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
+
+let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
+    hasSideEffects = 0,
+validSubTargets = HasV55SubT in
+def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
+  (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
+  "$dst1,$dst2 = vacsh($src1,$src2)", [],
+  "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
+    hasSideEffects = 0 in
+class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
+                  RegisterClass RCin2>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
+
+multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
+  def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
+                               VecPredRegs128B, VectorRegs128B>;
+}
+
+multiclass T_HVX_alu2_V <string asmString> :
+  T_HVX_alu2 <asmString, VectorRegs>;
+
+multiclass T_HVX_alu2_W <string asmString> :
+  T_HVX_alu2 <asmString, VecDblRegs>;
+
+defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
+    hasSideEffects = 0 in
+defm V6_vmux  : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
+
+class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
+
+multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                           !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_V <string asmString> :
+  T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_W <string asmString> :
+  T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
+class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+                       RegisterClass RCin>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+    asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+                            RegisterClass RCin> {
+  def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vlutb_acc<asmString,
+                                   !cast<RegisterClass>(RCout#"128B"),
+                                   !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_acc_V <string asmString> :
+  T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_acc_W <string asmString> :
+  T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
+defm V6_vlutvvb:
+     T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
+defm V6_vlutvwh:
+     T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
+
+let hasNewValue = 1 in {
+  defm V6_vlutvvb_oracc:
+       T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
+       V6_vlutvvb_oracc_enc;
+  defm V6_vlutvwh_oracc:
+       T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
+       V6_vlutvwh_oracc_enc;
+}
+
+// It's a fake instruction and should not be defined?
+def S2_cabacencbin
+  : SInst2<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+    "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
+
+// Vhist instructions
+def V6_vhistq
+  : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
+    "vhist($src1)">, V6_vhistq_enc;
+
+def V6_vhist
+  : CVI_HIST_Resource1 <(outs), (ins),
+    "vhist" >, V6_vhist_enc;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
new file mode 100644
index 0000000..96dd531
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -0,0 +1,526 @@
+//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon Vector instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
+def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
+def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
+def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a IntRegs:$src))),
+             (b IntRegs:$src)>;
+  def : Pat <(a (bitconvert (b IntRegs:$src))),
+             (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+             (b DoubleRegs:$src)>;
+  def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+             (a DoubleRegs:$src)>;
+}
+
+multiclass bitconvert_vec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VectorRegs:$src))),
+             (b  VectorRegs:$src)>;
+  def : Pat <(a (bitconvert (b VectorRegs:$src))),
+             (a  VectorRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecDblRegs:$src))),
+             (b  VecDblRegs:$src)>;
+  def : Pat <(a (bitconvert (b VecDblRegs:$src))),
+             (a  VecDblRegs:$src)>;
+}
+
+multiclass bitconvert_predvec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecPredRegs:$src))),
+             (b  VectorRegs:$src)>;
+  def : Pat <(a (bitconvert (b VectorRegs:$src))),
+             (a  VecPredRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
+             (b  VecDblRegs128B:$src)>;
+  def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
+             (a  VecDblRegs128B:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+defm : bitconvert_vec<v64i8, v16i32>;
+defm : bitconvert_vec<v8i64 , v16i32>;
+defm : bitconvert_vec<v32i16, v16i32>;
+
+defm : bitconvert_dblvec<v16i64, v128i8>;
+defm : bitconvert_dblvec<v32i32, v128i8>;
+defm : bitconvert_dblvec<v64i16, v128i8>;
+
+defm : bitconvert_dblvec128B<v64i32, v128i16>;
+defm : bitconvert_dblvec128B<v256i8, v128i16>;
+defm : bitconvert_dblvec128B<v32i64, v128i16>;
+
+defm : bitconvert_dblvec128B<v64i32, v256i8>;
+defm : bitconvert_dblvec128B<v32i64, v256i8>;
+defm : bitconvert_dblvec128B<v128i16, v256i8>;
+
+// Vector shift support. Vector shifting in Hexagon is rather different
+// from internal representation of LLVM.
+// LLVM assumes all shifts (in vector case) will have the form
+// <VT> = SHL/SRA/SRL <VT> by <VT>
+// while Hexagon has the following format:
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
+// As a result, special care is needed to guarantee correctness and
+// performance.
+class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+  : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
+      [(set (v4i16 DoubleRegs:$dst),
+            (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
+  bits<4> src2;
+  let Inst{11-8} = src2;
+}
+
+class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
+  : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
+      [(set (v2i32 DoubleRegs:$dst),
+            (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
+  bits<5> src2;
+  let Inst{12-8} = src2;
+}
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
+def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
+def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
+
+def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
+def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
+def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+  : Pat <(Op Type:$Rss, Type:$Rtt),
+         (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh,  add, V4I16>;
+def: VArith_pat <A2_vaddw,  add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh,  sub, V4I16>;
+def: VArith_pat <A2_vsubw,  sub, V2I32>;
+
+def: VArith_pat <A2_and,    and, V2I16>;
+def: VArith_pat <A2_xor,    xor, V2I16>;
+def: VArith_pat <A2_or,     or,  V2I16>;
+
+def: VArith_pat <A2_andp,   and, V8I8>;
+def: VArith_pat <A2_andp,   and, V4I16>;
+def: VArith_pat <A2_andp,   and, V2I32>;
+def: VArith_pat <A2_orp,    or,  V8I8>;
+def: VArith_pat <A2_orp,    or,  V4I16>;
+def: VArith_pat <A2_orp,    or,  V2I32>;
+def: VArith_pat <A2_xorp,   xor, V8I8>;
+def: VArith_pat <A2_xorp,   xor, V4I16>;
+def: VArith_pat <A2_xorp,   xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+                                                    (i32 u5ImmPred:$c))))),
+         (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+         (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+         (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+         (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+// Vector shift words by register
+def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
+def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
+def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
+def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
+
+// Vector shift halfwords by register
+def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
+def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
+def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
+def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(Op Value:$Rs, I32:$Rt),
+         (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+         (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+  : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+         (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+      ".error \"Should never try to emit VMULW\"",
+      [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+      ".error \"Should never try to emit VMULW_ACC\"",
+      [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+      "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+         (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+         (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+         (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+         (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+                    ValueType CmpTy>
+  : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+        (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+         (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+         (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+         (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+         (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+         (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32.  We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+                      (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+                             (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+         (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+                      (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+  (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+               (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+  : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+      "$a = " # Str # "($b, $c)",
+      [(set (i64 DoubleRegs:$a),
+            (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+      "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+  : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+        (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+                             (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+                                                      (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+         (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+         (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 0000000..b207aaf
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,1293 @@
+//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+class T_I_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$Is),
+         (MI imm:$Is)>;
+
+class T_R_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs),
+         (MI I32:$Rs)>;
+
+class T_P_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs),
+         (MI DoubleRegs:$Rs)>;
+
+class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+  : Pat<(IntID Imm1:$Is, Imm2:$It),
+        (MI Imm1:$Is, Imm2:$It)>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID I32:$Rs, ImmPred:$It),
+        (MI I32:$Rs, ImmPred:$It)>;
+
+class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID ImmPred:$Is, I32:$Rt),
+        (MI ImmPred:$Is, I32:$Rt)>;
+
+class T_PI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID I64:$Rs, imm:$It),
+        (MI DoubleRegs:$Rs, imm:$It)>;
+
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID I32:$Rs, I64:$Rt),
+        (MI I32:$Rs, DoubleRegs:$Rt)>;
+
+class T_RR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt),
+         (MI I32:$Rs, I32:$Rt)>;
+
+class T_PP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
+  : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It),
+         (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>;
+
+class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+  : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is),
+         (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>;
+
+class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
+  : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs),
+         (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>;
+
+class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
+         (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
+
+class T_RII_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
+         (MI I32:$Rs, imm:$It, imm:$Iu)>;
+
+class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
+         (MI imm:$It, I32:$Rs, imm:$Iu)>;
+
+class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
+         (MI imm:$Is, I32:$Rs, I32:$Rt)>;
+
+class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
+         (MI I32:$Rs, imm:$Is, I32:$Rt)>;
+
+class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
+         (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
+
+class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>;
+
+class T_PII_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
+         (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>;
+
+class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>;
+
+class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>;
+
+class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
+         (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>;
+
+class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)),
+         (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>;
+
+class T_PR_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID I64:$Rs, I32:$Rt),
+         (MI DoubleRegs:$Rs, I32:$Rt)>;
+
+class T_D_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID (F64:$Rs)),
+        (MI (F64:$Rs))>;
+
+class T_DI_pat <InstHexagon MI, Intrinsic IntID,
+                PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID F64:$Rs, ImmPred:$It),
+        (MI F64:$Rs, ImmPred:$It)>;
+
+class T_F_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs),
+        (MI F32:$Rs)>;
+
+class T_FI_pat <InstHexagon MI, Intrinsic IntID,
+                 PatLeaf ImmPred = PatLeaf<(i32 imm)>>
+  : Pat<(IntID F32:$Rs, ImmPred:$It),
+        (MI F32:$Rs, ImmPred:$It)>;
+
+class T_FF_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs, F32:$Rt),
+        (MI F32:$Rs, F32:$Rt)>;
+
+class T_DD_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F64:$Rs, F64:$Rt),
+        (MI F64:$Rs, F64:$Rt)>;
+
+class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
+        (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
+
+class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
+  : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)),
+         (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords
+//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
+def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
+def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
+def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
+def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
+def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
+def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
+def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
+
+def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
+def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
+def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
+def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
+def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
+def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
+def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
+def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
+
+def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
+
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the accumulator.
+//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
+def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
+def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
+def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
+def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+
+//===----------------------------------------------------------------------===//
+// Multiply signed/unsigned halfwords with and without saturation and rounding
+// into a 64-bits destination register.
+//===----------------------------------------------------------------------===//
+
+def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
+def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
+def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
+def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
+def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
+def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
+def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
+def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
+
+def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
+def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
+def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
+def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
+
+def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
+def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
+def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
+def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
+def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
+def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
+def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
+def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
+
+//===----------------------------------------------------------------------===//
+// MPYS / Multipy signed/unsigned halfwords and add/subtract the
+// result from the 64-bit destination register.
+//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
+//===----------------------------------------------------------------------===//
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
+def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
+def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
+
+def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
+def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
+def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
+
+// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
+def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
+
+// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
+def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
+
+// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
+def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
+
+// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
+def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
+def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
+
+//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyh_s0,  int_hexagon_M2_mmpyh_s0>;
+def : T_PP_pat <M2_mmpyh_s1,  int_hexagon_M2_mmpyh_s1>;
+def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
+def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
+
+//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyl_s0,  int_hexagon_M2_mmpyl_s0>;
+def : T_PP_pat <M2_mmpyl_s1,  int_hexagon_M2_mmpyl_s1>;
+def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
+def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
+
+//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyuh_s0,  int_hexagon_M2_mmpyuh_s0>;
+def : T_PP_pat <M2_mmpyuh_s1,  int_hexagon_M2_mmpyuh_s1>;
+def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
+def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
+
+//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PP_pat <M2_mmpyul_s0,  int_hexagon_M2_mmpyul_s0>;
+def : T_PP_pat <M2_mmpyul_s1,  int_hexagon_M2_mmpyul_s1>;
+def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
+def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
+
+// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
+def : T_PP_pat  <A2_vraddub,     int_hexagon_A2_vraddub>;
+def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
+
+// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
+def : T_PP_pat  <A2_vrsadub,     int_hexagon_A2_vrsadub>;
+def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
+
+// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
+
+// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
+def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
+
+// Vector reduce complex multiply real or imaginary:
+// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
+def : T_PP_pat  <M2_vrcmpyi_s0,  int_hexagon_M2_vrcmpyi_s0>;
+def : T_PP_pat  <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
+def : T_PPP_pat <M2_vrcmaci_s0,  int_hexagon_M2_vrcmaci_s0>;
+def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
+
+def : T_PP_pat  <M2_vrcmpyr_s0,  int_hexagon_M2_vrcmpyr_s0>;
+def : T_PP_pat  <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
+def : T_PPP_pat <M2_vrcmacr_s0,  int_hexagon_M2_vrcmacr_s0>;
+def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
+
+// Vector reduce halfwords
+// Rdd[+]=vrmpyh(Rss,Rtt)
+def : T_PP_pat  <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
+def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
+
+//===----------------------------------------------------------------------===//
+// Vector Multipy with accumulation
+//===----------------------------------------------------------------------===//
+
+// Vector multiply word by signed half with accumulation
+// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
+def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
+def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
+def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
+def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
+def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
+def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
+def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
+
+// Vector multiply word by unsigned half with accumulation
+// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
+def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
+def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
+def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
+def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
+def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
+def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
+def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
+def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
+
+// Vector multiply even halfwords with accumulation
+// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
+def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
+def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
+def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
+
+// Vector dual multiply with accumulation
+// Rxx+=vdmpy(Rss,Rtt)[:sat]
+def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
+def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
+
+// Vector complex multiply real or imaginary with accumulation
+// Rxx+=vcmpy[ir](Rss,Rtt):sat
+def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
+def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
+
+//===----------------------------------------------------------------------===//
+// Add/Subtract halfword
+// Rd=add(Rt.L,Rs.[HL])[:sat]
+// Rd=sub(Rt.L,Rs.[HL])[:sat]
+// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
+// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
+//===----------------------------------------------------------------------===//
+
+//Rd=add(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_addh_l16_ll,     int_hexagon_A2_addh_l16_ll>;
+def : T_RR_pat <A2_addh_l16_hl,     int_hexagon_A2_addh_l16_hl>;
+
+//Rd=add(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
+def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH])
+def : T_RR_pat <A2_subh_l16_ll,     int_hexagon_A2_subh_l16_ll>;
+def : T_RR_pat <A2_subh_l16_hl,     int_hexagon_A2_subh_l16_hl>;
+
+//Rd=sub(Rt.L,Rs.[LH]):sat
+def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
+def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_addh_h16_ll,     int_hexagon_A2_addh_h16_ll>;
+def : T_RR_pat <A2_addh_h16_lh,     int_hexagon_A2_addh_h16_lh>;
+def : T_RR_pat <A2_addh_h16_hl,     int_hexagon_A2_addh_h16_hl>;
+def : T_RR_pat <A2_addh_h16_hh,     int_hexagon_A2_addh_h16_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):<<16
+def : T_RR_pat <A2_subh_h16_ll,     int_hexagon_A2_subh_h16_ll>;
+def : T_RR_pat <A2_subh_h16_lh,     int_hexagon_A2_subh_h16_lh>;
+def : T_RR_pat <A2_subh_h16_hl,     int_hexagon_A2_subh_h16_hl>;
+def : T_RR_pat <A2_subh_h16_hh,     int_hexagon_A2_subh_h16_hh>;
+
+//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
+def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
+def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
+def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
+
+//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
+def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
+def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
+def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
+def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
+
+// ALU64 / ALU / min max
+def : T_RR_pat<A2_max,  int_hexagon_A2_max>;
+def : T_RR_pat<A2_min,  int_hexagon_A2_min>;
+def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
+def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
+
+// Shift and accumulate
+def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
+
+def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
+def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
+def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
+def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
+def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
+def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
+
+def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
+def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
+def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
+def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
+def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
+def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
+def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
+
+def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
+def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
+def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
+def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
+def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
+def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
+
+def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
+def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
+def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
+def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
+def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
+def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
+def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
+
+def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
+def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
+def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
+def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
+def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
+def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
+def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
+def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
+
+def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
+def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
+def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
+def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
+def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
+def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
+def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
+def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
+
+def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
+def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
+def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
+def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
+def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
+def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
+def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
+def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
+
+def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
+def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
+def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
+def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
+def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
+def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
+def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
+def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+def : T_RR_pat<A2_add,      int_hexagon_A2_add>;
+def : T_RI_pat<A2_addi,     int_hexagon_A2_addi>;
+def : T_RR_pat<A2_sub,      int_hexagon_A2_sub>;
+def : T_IR_pat<A2_subri,    int_hexagon_A2_subri>;
+def : T_RR_pat<A2_and,      int_hexagon_A2_and>;
+def : T_RI_pat<A2_andir,    int_hexagon_A2_andir>;
+def : T_RR_pat<A2_or,       int_hexagon_A2_or>;
+def : T_RI_pat<A2_orir,     int_hexagon_A2_orir>;
+def : T_RR_pat<A2_xor,      int_hexagon_A2_xor>;
+def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+
+// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
+def : Pat <(int_hexagon_A2_not (I32:$Rs)),
+           (A2_subri -1, IntRegs:$Rs)>;
+
+// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32)
+def : Pat <(int_hexagon_A2_neg IntRegs:$Rs),
+           (A2_subri 0, IntRegs:$Rs)>;
+
+// Transfer immediate
+def  : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is),
+            (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>;
+def  : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is),
+            (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>;
+
+//  Transfer Register/immediate.
+def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
+def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
+def : T_I_pat <A2_tfrpi, int_hexagon_A2_tfrpi>;
+
+// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
+def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src),
+          (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>;
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+// Combine
+def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
+def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
+def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
+def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
+
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
+
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
+
+// Mux
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
+
+// Shift halfword
+def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
+def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
+def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>;
+
+// Sign/zero extend
+def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
+def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
+def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
+def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+// Compare
+def : T_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
+def : T_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
+def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
+
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
+      (i32 (C2_cmpgti (I32:$src1),
+                      (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
+      (i32 (C2_cmpgtui (I32:$src1),
+                       (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+
+// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
+      (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1),
+                                      (I32:$src2))),
+      (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>;
+
+def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1),
+                                       (I32:$src2))),
+      (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>;
+
+/********************************************************************
+*            ALU32/VH                                               *
+*********************************************************************/
+// Vector add, subtract, average halfwords
+def: T_RR_pat<A2_svaddh,   int_hexagon_A2_svaddh>;
+def: T_RR_pat<A2_svaddhs,  int_hexagon_A2_svaddhs>;
+def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
+
+def: T_RR_pat<A2_svsubh,   int_hexagon_A2_svsubh>;
+def: T_RR_pat<A2_svsubhs,  int_hexagon_A2_svsubhs>;
+def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
+
+def: T_RR_pat<A2_svavgh,   int_hexagon_A2_svavgh>;
+def: T_RR_pat<A2_svavghs,  int_hexagon_A2_svavghs>;
+def: T_RR_pat<A2_svnavgh,  int_hexagon_A2_svnavgh>;
+
+/********************************************************************
+*            ALU64/ALU                                              *
+*********************************************************************/
+def: T_RR_pat<A2_addsat,   int_hexagon_A2_addsat>;
+def: T_RR_pat<A2_subsat,   int_hexagon_A2_subsat>;
+def: T_PP_pat<A2_addp,     int_hexagon_A2_addp>;
+def: T_PP_pat<A2_subp,     int_hexagon_A2_subp>;
+
+def: T_PP_pat<A2_andp,     int_hexagon_A2_andp>;
+def: T_PP_pat<A2_orp,      int_hexagon_A2_orp>;
+def: T_PP_pat<A2_xorp,     int_hexagon_A2_xorp>;
+
+def: T_PP_pat<C2_cmpeqp,   int_hexagon_C2_cmpeqp>;
+def: T_PP_pat<C2_cmpgtp,   int_hexagon_C2_cmpgtp>;
+def: T_PP_pat<C2_cmpgtup,  int_hexagon_C2_cmpgtup>;
+
+def: T_PP_pat<S2_parityp,  int_hexagon_S2_parityp>;
+def: T_RR_pat<S2_packhl,   int_hexagon_S2_packhl>;
+
+/********************************************************************
+*            ALU64/VB                                               *
+*********************************************************************/
+// ALU64 - Vector add
+def : T_PP_pat <A2_vaddub,   int_hexagon_A2_vaddub>;
+def : T_PP_pat <A2_vaddubs,  int_hexagon_A2_vaddubs>;
+def : T_PP_pat <A2_vaddh,    int_hexagon_A2_vaddh>;
+def : T_PP_pat <A2_vaddhs,   int_hexagon_A2_vaddhs>;
+def : T_PP_pat <A2_vadduhs,  int_hexagon_A2_vadduhs>;
+def : T_PP_pat <A2_vaddw,    int_hexagon_A2_vaddw>;
+def : T_PP_pat <A2_vaddws,   int_hexagon_A2_vaddws>;
+
+// ALU64 - Vector average
+def : T_PP_pat <A2_vavgub,   int_hexagon_A2_vavgub>;
+def : T_PP_pat <A2_vavgubr,  int_hexagon_A2_vavgubr>;
+def : T_PP_pat <A2_vavgh,    int_hexagon_A2_vavgh>;
+def : T_PP_pat <A2_vavghr,   int_hexagon_A2_vavghr>;
+def : T_PP_pat <A2_vavghcr,  int_hexagon_A2_vavghcr>;
+def : T_PP_pat <A2_vavguh,   int_hexagon_A2_vavguh>;
+def : T_PP_pat <A2_vavguhr,  int_hexagon_A2_vavguhr>;
+
+def : T_PP_pat <A2_vavgw,    int_hexagon_A2_vavgw>;
+def : T_PP_pat <A2_vavgwr,   int_hexagon_A2_vavgwr>;
+def : T_PP_pat <A2_vavgwcr,  int_hexagon_A2_vavgwcr>;
+def : T_PP_pat <A2_vavguw,   int_hexagon_A2_vavguw>;
+def : T_PP_pat <A2_vavguwr,  int_hexagon_A2_vavguwr>;
+
+// ALU64 - Vector negative average
+def : T_PP_pat <A2_vnavgh,   int_hexagon_A2_vnavgh>;
+def : T_PP_pat <A2_vnavghr,  int_hexagon_A2_vnavghr>;
+def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
+def : T_PP_pat <A2_vnavgw,   int_hexagon_A2_vnavgw>;
+def : T_PP_pat <A2_vnavgwr,  int_hexagon_A2_vnavgwr>;
+def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
+
+// ALU64 - Vector max
+def : T_PP_pat <A2_vmaxh,    int_hexagon_A2_vmaxh>;
+def : T_PP_pat <A2_vmaxw,    int_hexagon_A2_vmaxw>;
+def : T_PP_pat <A2_vmaxub,   int_hexagon_A2_vmaxub>;
+def : T_PP_pat <A2_vmaxuh,   int_hexagon_A2_vmaxuh>;
+def : T_PP_pat <A2_vmaxuw,   int_hexagon_A2_vmaxuw>;
+
+// ALU64 - Vector min
+def : T_PP_pat <A2_vminh,    int_hexagon_A2_vminh>;
+def : T_PP_pat <A2_vminw,    int_hexagon_A2_vminw>;
+def : T_PP_pat <A2_vminub,   int_hexagon_A2_vminub>;
+def : T_PP_pat <A2_vminuh,   int_hexagon_A2_vminuh>;
+def : T_PP_pat <A2_vminuw,   int_hexagon_A2_vminuw>;
+
+// ALU64 - Vector sub
+def : T_PP_pat <A2_vsubub,   int_hexagon_A2_vsubub>;
+def : T_PP_pat <A2_vsububs,  int_hexagon_A2_vsububs>;
+def : T_PP_pat <A2_vsubh,    int_hexagon_A2_vsubh>;
+def : T_PP_pat <A2_vsubhs,   int_hexagon_A2_vsubhs>;
+def : T_PP_pat <A2_vsubuhs,  int_hexagon_A2_vsubuhs>;
+def : T_PP_pat <A2_vsubw,    int_hexagon_A2_vsubw>;
+def : T_PP_pat <A2_vsubws,   int_hexagon_A2_vsubws>;
+
+// ALU64 - Vector compare bytes
+def : T_PP_pat <A2_vcmpbeq,  int_hexagon_A2_vcmpbeq>;
+def : T_PP_pat <A4_vcmpbgt,  int_hexagon_A4_vcmpbgt>;
+def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 - Vector compare halfwords
+def : T_PP_pat <A2_vcmpheq,  int_hexagon_A2_vcmpheq>;
+def : T_PP_pat <A2_vcmphgt,  int_hexagon_A2_vcmphgt>;
+def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
+
+// ALU64 - Vector compare words
+def : T_PP_pat <A2_vcmpweq,  int_hexagon_A2_vcmpweq>;
+def : T_PP_pat <A2_vcmpwgt,  int_hexagon_A2_vcmpwgt>;
+def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VB / Vector mux.
+def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+          (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+// MPY - Multiply and use full result
+// Rdd = mpy[u](Rs, Rt)
+def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
+def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
+
+// Complex multiply real or imaginary
+def : T_RR_pat <M2_cmpyi_s0,   int_hexagon_M2_cmpyi_s0>;
+def : T_RR_pat <M2_cmpyr_s0,   int_hexagon_M2_cmpyr_s0>;
+
+// Complex multiply
+def : T_RR_pat <M2_cmpys_s0,   int_hexagon_M2_cmpys_s0>;
+def : T_RR_pat <M2_cmpysc_s0,  int_hexagon_M2_cmpysc_s0>;
+def : T_RR_pat <M2_cmpys_s1,   int_hexagon_M2_cmpys_s1>;
+def : T_RR_pat <M2_cmpysc_s1,  int_hexagon_M2_cmpysc_s1>;
+
+// Vector multiply halfwords
+// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2s_s0,  int_hexagon_M2_vmpy2s_s0>;
+def : T_RR_pat <M2_vmpy2s_s1,  int_hexagon_M2_vmpy2s_s1>;
+
+// Rxx[+-]= mpy[u](Rs,Rt)
+def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
+def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
+def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
+def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
+
+// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
+def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
+def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
+def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
+
+// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
+def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
+def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
+def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
+def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
+
+// Rxx+=cmpy[ir](Rs,Rt)
+def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
+def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
+
+// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
+def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
+def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
+def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> :
+  Pat<(i32 (IntID IntRegs:$Rs)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>;
+
+class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> :
+  Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>;
+
+def: qi_CRInst_qi_pat<C2_not,     int_hexagon_C2_not>;
+def: qi_CRInst_qi_pat<C2_all8,    int_hexagon_C2_all8>;
+def: qi_CRInst_qi_pat<C2_any8,    int_hexagon_C2_any8>;
+
+def: qi_CRInst_qiqi_pat<C2_and,   int_hexagon_C2_and>;
+def: qi_CRInst_qiqi_pat<C2_andn,  int_hexagon_C2_andn>;
+def: qi_CRInst_qiqi_pat<C2_or,    int_hexagon_C2_or>;
+def: qi_CRInst_qiqi_pat<C2_orn,   int_hexagon_C2_orn>;
+def: qi_CRInst_qiqi_pat<C2_xor,   int_hexagon_C2_xor>;
+
+// Assembler mapped from  Pd4=Ps4 to Pd4=or(Ps4,Ps4)
+def : Pat<(int_hexagon_C2_pxfer_map PredRegs:$src),
+          (C2_pxfer_map PredRegs:$src)>;
+
+// Multiply 32x32 and use lower result
+def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
+def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
+def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>;
+
+// Subtract and accumulate
+def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
+
+// Add and accumulate
+def : T_RRR_pat <M2_acci,   int_hexagon_M2_acci>;
+def : T_RRR_pat <M2_nacci,  int_hexagon_M2_nacci>;
+def : T_RRI_pat <M2_accii,  int_hexagon_M2_accii>;
+def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
+
+// XOR and XOR with destination
+def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
+
+class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> :
+      Pat <(IntID IntRegs:$src1, IntRegs:$src2),
+           (OutputInst IntRegs:$src1, IntRegs:$src2)>;
+
+// Vector dual multiply with round and pack
+
+def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+// Vector multiply halfwords with round and pack
+
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>;
+def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>;
+
+// Multiply and use lower result
+def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>;
+def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>;
+
+// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
+def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>;
+def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>;
+def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>;
+
+// Complex multiply with round and pack
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>;
+def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>;
+
+/********************************************************************
+*            STYPE/ALU                                              *
+*********************************************************************/
+def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
+def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
+def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
+
+/********************************************************************
+*            STYPE/BIT                                              *
+*********************************************************************/
+
+// Count leading/trailing
+def: T_R_pat<S2_cl0,     int_hexagon_S2_cl0>;
+def: T_P_pat<S2_cl0p,    int_hexagon_S2_cl0p>;
+def: T_R_pat<S2_cl1,     int_hexagon_S2_cl1>;
+def: T_P_pat<S2_cl1p,    int_hexagon_S2_cl1p>;
+def: T_R_pat<S2_clb,     int_hexagon_S2_clb>;
+def: T_P_pat<S2_clbp,    int_hexagon_S2_clbp>;
+def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
+def: T_R_pat<S2_ct0,     int_hexagon_S2_ct0>;
+def: T_R_pat<S2_ct1,     int_hexagon_S2_ct1>;
+
+// Compare bit mask
+def: T_RR_pat<C2_bitsclr,  int_hexagon_C2_bitsclr>;
+def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
+def: T_RR_pat<C2_bitsset,  int_hexagon_C2_bitsset>;
+
+// Vector shuffle
+def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
+def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
+def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
+def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
+
+// Vector truncate
+def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
+def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
+
+// Linear feedback-shift Iteration.
+def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
+
+// Vector splice
+def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
+def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
+
+// Shift by immediate and add
+def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
+
+// Extract bitfield
+def : T_PII_pat<S2_extractup,    int_hexagon_S2_extractup>;
+def : T_RII_pat<S2_extractu,     int_hexagon_S2_extractu>;
+def : T_RP_pat <S2_extractu_rp,  int_hexagon_S2_extractu_rp>;
+def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
+
+// Insert bitfield
+def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2,
+                                     DoubleRegs:$src3),
+           (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1),
+                 (I64:$src2), (I64:$src3))),
+          (i64 (S2_insertp_rp (I64:$src1), (I64:$src2),
+                              (I64:$src3)))>;
+
+def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2,
+                                 u5ImmPred:$src3, u5ImmPred:$src4),
+          (S2_insert IntRegs:$src1, IntRegs:$src2,
+                     u5ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1),
+                 (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)),
+          (i64 (S2_insertp (I64:$src1), (I64:$src2),
+                           u6ImmPred:$src3, u6ImmPred:$src4))>;
+
+
+// Innterleave/deinterleave
+def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
+def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
+
+// Set/Clear/Toggle Bit
+def: T_RI_pat<S2_setbit_i,    int_hexagon_S2_setbit_i>;
+def: T_RI_pat<S2_clrbit_i,    int_hexagon_S2_clrbit_i>;
+def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
+
+def: T_RR_pat<S2_setbit_r,    int_hexagon_S2_setbit_r>;
+def: T_RR_pat<S2_clrbit_r,    int_hexagon_S2_clrbit_r>;
+def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
+
+// Test Bit
+def: T_RI_pat<S2_tstbit_i,    int_hexagon_S2_tstbit_i>;
+def: T_RR_pat<S2_tstbit_r,    int_hexagon_S2_tstbit_r>;
+
+/********************************************************************
+*            STYPE/COMPLEX                                          *
+*********************************************************************/
+// Vector Complex conjugate
+def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
+
+// Vector Complex rotate
+def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
+
+/********************************************************************
+*            STYPE/PERM                                             *
+*********************************************************************/
+
+// Vector saturate without pack
+def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
+def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
+def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
+def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
+
+/********************************************************************
+*            STYPE/PRED                                             *
+*********************************************************************/
+
+// Predicate transfer
+def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))),
+         (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))),
+         (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>;
+
+// Mask generate from predicate
+def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))),
+         (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>;
+
+// Viterbi pack even and odd predicate bits
+def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)),
+                          (C2_tfrrp (I32:$Rt))))>;
+
+/********************************************************************
+*            STYPE/SHIFT                                            *
+*********************************************************************/
+
+def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
+def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
+def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
+
+def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
+def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
+def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
+def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
+
+def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
+def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
+def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
+def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
+
+def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
+def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
+
+def : T_R_pat <S2_vsxtbh,   int_hexagon_S2_vsxtbh>;
+def : T_R_pat <S2_vzxtbh,   int_hexagon_S2_vzxtbh>;
+def : T_R_pat <S2_vsxthw,   int_hexagon_S2_vsxthw>;
+def : T_R_pat <S2_vzxthw,   int_hexagon_S2_vzxthw>;
+def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
+def : T_R_pat <A2_sxtw,     int_hexagon_A2_sxtw>;
+
+// Vector saturate and pack
+def : T_R_pat <S2_svsathb,  int_hexagon_S2_svsathb>;
+def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
+def : T_P_pat <S2_vsathub,  int_hexagon_S2_vsathub>;
+def : T_P_pat <S2_vsatwh,   int_hexagon_S2_vsatwh>;
+def : T_P_pat <S2_vsatwuh,  int_hexagon_S2_vsatwuh>;
+def : T_P_pat <S2_vsathb,   int_hexagon_S2_vsathb>;
+
+def : T_P_pat <S2_vtrunohb,    int_hexagon_S2_vtrunohb>;
+def : T_P_pat <S2_vtrunehb,    int_hexagon_S2_vtrunehb>;
+def : T_P_pat <S2_vrndpackwh,  int_hexagon_S2_vrndpackwh>;
+def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
+def : T_R_pat <S2_brev,        int_hexagon_S2_brev>;
+def : T_R_pat <S2_vsplatrb,    int_hexagon_S2_vsplatrb>;
+
+def : T_R_pat <A2_abs,    int_hexagon_A2_abs>;
+def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
+def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
+
+def : T_R_pat <A2_swiz,   int_hexagon_A2_swiz>;
+
+def : T_P_pat <A2_sat,    int_hexagon_A2_sat>;
+def : T_R_pat <A2_sath,   int_hexagon_A2_sath>;
+def : T_R_pat <A2_satuh,  int_hexagon_A2_satuh>;
+def : T_R_pat <A2_satub,  int_hexagon_A2_satub>;
+def : T_R_pat <A2_satb,   int_hexagon_A2_satb>;
+
+// Vector arithmetic shift right by immediate with truncate and pack.
+def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
+
+def : T_RI_pat <S2_asr_i_r,     int_hexagon_S2_asr_i_r>;
+def : T_RI_pat <S2_lsr_i_r,     int_hexagon_S2_lsr_i_r>;
+def : T_RI_pat <S2_asl_i_r,     int_hexagon_S2_asl_i_r>;
+def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
+def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
+                int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// Shift left by immediate with saturation.
+def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
+
+//===----------------------------------------------------------------------===//
+// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
+//===----------------------------------------------------------------------===//
+class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
+                         SDNodeXForm XformImm>
+  : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4),
+         (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3,
+                     (XformImm u5ImmPred:$src4))>;
+
+
+// Table Index : Extract and insert bits.
+// Map to the real hardware instructions after subtracting appropriate
+// values from the 4th input operand. Please note that subtraction is not
+// needed for int_hexagon_S2_tableidxb_goodsyntax.
+
+def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2,
+                                              u4ImmPred:$src3, u5ImmPred:$src4),
+           (S2_tableidxb IntRegs:$src1, IntRegs:$src2,
+                         u4ImmPred:$src3, u5ImmPred:$src4)>;
+
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
+                         DEC_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
+                         DEC2_CONST_SIGNED>;
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
+                         DEC3_CONST_SIGNED>;
+
+/********************************************************************
+*            STYPE/VH                                               *
+*********************************************************************/
+
+// Vector absolute value halfwords with and without saturation
+// Rdd64=vabsh(Rss64)[:sat]
+def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
+def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
+
+// Vector shift halfwords by immediate
+// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
+def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
+def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
+def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
+
+// Vector shift halfwords by register
+// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
+def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
+def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
+def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
+
+/********************************************************************
+*            STYPE/VW                                               *
+*********************************************************************/
+
+// Vector absolute value words with and without saturation
+def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
+def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
+
+// Vector shift words by immediate.
+// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
+def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
+def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
+def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
+
+// Vector shift words by register.
+// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
+def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
+def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
+def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
+def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
+
+// Vector shift words with truncate and pack
+
+def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
+
+def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
+def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
+
+def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
+         (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>;
+def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
+         (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+
+/********************************************************************
+*            ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+  : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+        (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth,   I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb,   I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw,   I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std,   I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+  : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+        (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb,   s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth,   s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw,   s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std,   s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
+include "HexagonIntrinsicsV5.td"
+include "HexagonIntrinsicsV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 0000000..4c28b28
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,40 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+      (i64
+       (A2_combinew
+        (M2_maci
+         (M2_maci
+          (i32
+           (EXTRACT_SUBREG
+            (i64
+             (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
+                                          subreg_loreg)),
+                     (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+                                          subreg_loreg)))),
+            subreg_hireg)),
+          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))),
+         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)),
+         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))),
+        (i32
+         (EXTRACT_SUBREG
+          (i64
+           (M2_dpmpyuu_s0 
+             (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+                   (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
+                                        subreg_loreg)))), subreg_loreg))))>;
+
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 0000000..6152cb0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,27 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Vector reduce complex multiply real or imaginary
+def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 0000000..c80a188
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,318 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>;
+def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>;
+def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>;
+
+// Vector reduce add unsigned halfwords
+def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+           (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>;
+
+def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>;
+
+def: T_P_pat  <S2_ct0p,      int_hexagon_S2_ct0p>;
+def: T_P_pat  <S2_ct1p,      int_hexagon_S2_ct1p>;
+def: T_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
+def: T_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
+def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+
+class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> :
+      Pat <(IntID  (i64 DoubleRegs:$src1), immPred:$src2),
+           (MI (i64 DoubleRegs:$src1), immPred:$src2)>;
+
+def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>;
+
+def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>;
+def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>;
+
+def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
+
+def : T_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
+def : T_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
+def : T_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
+def : T_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
+def : T_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
+def : T_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
+
+def : T_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
+def : T_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
+def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
+def : T_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
+def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>;
+
+def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
+
+def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2,
+                                      IntRegs:$src3),
+           (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
+// Multiply 32x32 and use upper result
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+                                     u2ImmPred:$src3),
+           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                                         IntRegs:$src3, u2ImmPred:$src4),
+           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                             IntRegs:$src3, u2ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity,   int_hexagon_S4_parity>;
+
+def: T_RI_pat<S4_ntstbit_i,  int_hexagon_S4_ntstbit_i>;
+def: T_RR_pat<S4_ntstbit_r,  int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi,  int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+
+// Compare
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
+
+def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+
+// CR / Logical Operations On Predicates.
+
+class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> :
+  Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)),
+      (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs),
+                           (C2_tfrrp IntRegs:$Rt),
+                           (C2_tfrrp IntRegs:$Ru))))>;
+
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and,   C4_and_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn,  C4_and_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or,    C4_and_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn,   C4_and_orn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and,    C4_or_and>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn,   C4_or_andn>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or,     C4_or_or>;
+def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn,    C4_or_orn>;
+
+/********************************************************************
+*            XTYPE/ALU                                              *
+*********************************************************************/
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
new file mode 100644
index 0000000..60e6b1e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -0,0 +1,111 @@
+//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//Rdd[+]=vrmpybsu(Rss,Rtt)
+//Rdd[+]=vrmpybuu(Rss,Rtt)
+let Predicates = [HasV5T]  in {
+def : T_PP_pat  <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
+def : T_PP_pat  <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
+
+def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>;
+
+def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>;
+def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>;
+//Rxx+=vdmpybsu(Rss,Rtt):sat
+def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>;
+
+// Vector multiply bytes
+// Rdd=vmpyb[s]u(Rs,Rt)
+def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>;
+def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>;
+
+// Rxx+=vmpyb[s]u(Rs,Rt)
+def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>;
+def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
+
+// Rd=vaddhub(Rss,Rtt):sat
+def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
+}
+
+def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
+def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
+def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>;
+def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>;
+def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>;
+
+def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>;
+def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>;
+def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>;
+
+def: qi_CRInst_qiqi_pat<C4_fastcorner9,     int_hexagon_C4_fastcorner9>;
+def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>;
+
+def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>;
+def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>;
+
+def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>;
+def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax,
+                int_hexagon_S2_asr_i_p_rnd_goodsyntax>;
+
+def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax,
+                int_hexagon_S5_asrhub_rnd_sat_goodsyntax>;
+
+def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>;
+
+def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>;
+def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>;
+def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>;
+def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>;
+def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>;
+
+// Compare floating-point value
+def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>;
+def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>;
+def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>;
+def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>;
+
+def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>;
+def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>;
+def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>;
+def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>;
+
+// Create floating-point value
+def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>;
+def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>;
+def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>;
+def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>;
+
+def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>;
+def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>;
+def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>;
+def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>;
+def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>;
+def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>;
+def : T_R_pat <F2_conv_w2sf,  int_hexagon_F2_conv_w2sf>;
+def : T_R_pat <F2_conv_w2df,  int_hexagon_F2_conv_w2df>;
+def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>;
+def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>;
+def : T_P_pat <F2_conv_d2sf,  int_hexagon_F2_conv_d2sf>;
+def : T_P_pat <F2_conv_d2df,  int_hexagon_F2_conv_d2df>;
+def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>;
+def : T_F_pat <F2_conv_sf2w,  int_hexagon_F2_conv_sf2w>;
+def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>;
+def : T_F_pat <F2_conv_sf2d,  int_hexagon_F2_conv_sf2d>;
+def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>;
+def : T_D_pat <F2_conv_df2w,  int_hexagon_F2_conv_df2w>;
+def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>;
+def : T_D_pat <F2_conv_df2d,  int_hexagon_F2_conv_df2d>;
+def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>;
+def : T_F_pat <F2_conv_sf2w_chop,  int_hexagon_F2_conv_sf2w_chop>;
+def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>;
+def : T_F_pat <F2_conv_sf2d_chop,  int_hexagon_F2_conv_sf2d_chop>;
+def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>;
+def : T_D_pat <F2_conv_df2w_chop,  int_hexagon_F2_conv_df2w_chop>;
+def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>;
+def : T_D_pat <F2_conv_df2d_chop,  int_hexagon_F2_conv_df2d_chop>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
new file mode 100644
index 0000000..24a3e4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -0,0 +1,836 @@
+//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins ),
+    "$dst=#0",
+    [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
+
+def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins ),
+    "$dst=#0",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
+}
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=vassignp_W($src1)",
+    [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=vassignp_W_128B($src1)",
+    [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
+                                VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=lo_W($src1)",
+    [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=hi_W($src1)",
+    [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=lo_W($src1)",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=hi_W($src1)",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
+
+let AddedComplexity = 100 in {
+def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+            Requires<[UseHVXSgl]>;
+
+def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+            Requires<[UseHVXSgl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+                                     subreg_loreg)) >,
+            Requires<[UseHVXDbl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+                                     subreg_hireg)) >,
+            Requires<[UseHVXDbl]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8  VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v64i8  VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v8i64  VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v8i64  VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v64i8  (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v64i8  (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v8i64  (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v8i64  (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8  VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v128i8  VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v16i64  VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v16i64  VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v128i8  (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v128i8  (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v16i64  (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v16i64  (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai IntRegs:$addr, 0,
+           (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1),
+                                       (A2_tfrsi 0x01010101))))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+           (v512i1 (V6_vandvrt
+           (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai_128B IntRegs:$addr, 0,
+           (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1),
+                                       (A2_tfrsi 0x01010101))))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+           (v1024i1 (V6_vandvrt_128B
+           (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)),
+                                       (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+           (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1),
+           (MI    VectorRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1),
+           (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1),
+           (MI    VecPredRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1),
+           (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+           (MI    VecDblRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+           (MI    VectorRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+           (MI    VecDblRegs:$src1, VecDblRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+           (MI    VectorRegs:$src1, VectorRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+           (MI    VecPredRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+           (MI    VecPredRegs:$src1, VecPredRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VecPredRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VecPredRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+           (MI    VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VecPredRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VecPredRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+
+multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2, imm:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2, imm:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3),
+           (MI    VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            IntRegs:$src2, imm:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            IntRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4),
+           (MI   VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3, imm:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3, imm:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
+defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
+defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
+defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>;
+defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>;
+defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>;
+defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>;
+defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>;
+defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>;
+defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>;
+defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>;
+defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>;
+defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>;
+defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>;
+defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>;
+defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>;
+defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>;
+defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>;
+defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>;
+defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>;
+defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>;
+defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>;
+defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>;
+defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>;
+defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>;
+defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>;
+defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>;
+defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>;
+defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>;
+defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>;
+defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>;
+
+defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>;
+defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>;
+defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>;
+defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>;
+defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>;
+defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>;
+defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>;
+defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>;
+defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>;
+defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>;
+defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>;
+defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>;
+defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>;
+defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>;
+defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>;
+defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>;
+defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>;
+defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>;
+defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>;
+defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>;
+defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>;
+defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>;
+defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>;
+defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>;
+defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>;
+defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>;
+defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>;
+defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>;
+defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>;
+defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>;
+defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>;
+defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>;
+defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>;
+defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>;
+defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>;
+defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>;
+defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>;
+defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>;
+defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>;
+defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>;
+defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>;
+defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>;
+defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>;
+defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>;
+defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>;
+defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>;
+defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>;
+defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>;
+defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>;
+defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>;
+defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>;
+defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>;
+defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>;
+defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>;
+defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>;
+defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>;
+defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>;
+defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>;
+defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>;
+defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>;
+defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>;
+defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>;
+defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>;
+defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>;
+
+defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>;
+defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>;
+defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>;
+defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>;
+defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>;
+defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>;
+defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>;
+defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>;
+defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>;
+
+defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>;
+defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>;
+
+defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>;
+defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>;
+defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>;
+defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>;
+
+defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>;
+defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>;
+defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>;
+defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>;
+defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>;
+defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>;
+defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>;
+defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>;
+
+defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>;
+defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>;
+defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>;
+defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>;
+defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>;
+defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>;
+defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>;
+defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>;
+defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>;
+defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>;
+defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>;
+defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>;
+defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>;
+defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>;
+defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>;
+
+// Compare instructions
+defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>;
+defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>;
+defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>;
+defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>;
+defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>;
+defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>;
+defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>;
+defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>;
+defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>;
+defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>;
+defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>;
+defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>;
+defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>;
+defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>;
+defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>;
+defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>;
+defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>;
+defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>;
+defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>;
+defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>;
+defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>;
+defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>;
+defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>;
+defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>;
+defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>;
+defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>;
+defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>;
+
+defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>;
+defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>;
+defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>;
+defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>;
+defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>;
+defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>;
+defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>;
+defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>;
+defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>;
+defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>;
+defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>;
+defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>;
+defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>;
+defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>;
+defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>;
+defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>;
+defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>;
+defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>;
+defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>;
+defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>;
+defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>;
+defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>;
+defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>;
+defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>;
+defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>;
+defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>;
+defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>;
+defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>;
+defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>;
+defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>;
+defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>;
+defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>;
+defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>;
+defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>;
+defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>;
+defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>;
+defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>;
+defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>;
+defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>;
+defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>;
+defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>;
+defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>;
+defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>;
+defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>;
+defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>;
+defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>;
+
+defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>;
+defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>;
+defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>;
+defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>;
+defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>;
+defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>;
+defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>;
+defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>;
+defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>;
+defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>;
+defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>;
+defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>;
+
+defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>;
+defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>;
+defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>;
+defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>;
+defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>;
+defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>;
+defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>;
+defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>;
+defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>;
+defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>;
+defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>;
+defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>;
+defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>;
+defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>;
+defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>;
+defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>;
+defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>;
+defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>;
+defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>;
+defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>;
+defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
+defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
+defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
+
+defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
+defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
+defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
+
+defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>;
+defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>;
+defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>;
+
+// assembler mapped.
+//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>;
+// not present earlier.. need to add intrinsic
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>;
+defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>;
+defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>;
+defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>;
+defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>;
+defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>;
+defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>;
+
+defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>;
+defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>;
+
+defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>;
+defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>;
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>;
+
+defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>;
+defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>;
+defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>;
+defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>;
+defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>;
+defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>;
+defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>;
+defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>;
+defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>;
+defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>;
+defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>;
+defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>;
+defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>;
+defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>;
+defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>;
+defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>;
+defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>;
+
+defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>;
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>;
+defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>;
+defm  : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>;
+defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>;
+
+defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>;
+defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>;
+defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>;
+defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>;
+
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>;
+def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>;
+def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>;
+def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>;
+def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>;
+def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>;
+def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>;
+def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>;
+def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>;
+def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>;
+def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>;
+def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
+
+defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
+defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
+
+def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+         (v64i16 (V6_vpackwh_sat_128B
+                 (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
+                 (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+     Requires<[UseHVXDbl]>;
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
new file mode 100644
index 0000000..0ca95e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
@@ -0,0 +1,728 @@
+//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon duplex instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// SA1_combine1i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine1i: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins u2Imm:$u2),
+  "$Rdd = combine(#1, #$u2)"> {
+    bits<3> Rdd;
+    bits<2> u2;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b0;
+    let Inst{4-3} = 0b01;
+    let Inst{2-0} = Rdd;
+    let Inst{6-5} = u2;
+  }
+
+// SL2_jumpr31_f: Indirect conditional jump if false.
+// SL2_jumpr31_f -> SL2_jumpr31_fnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_f: SUBInst <
+  (outs ),
+  (ins ),
+  "if (!p0) jumpr r31"> {
+    let Inst{12-6} = 0b1111111;
+    let Inst{2-0} = 0b101;
+  }
+
+// SL2_deallocframe: Deallocate stack frame.
+let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_deallocframe: SUBInst <
+  (outs ),
+  (ins ),
+  "deallocframe"> {
+    let Inst{12-6} = 0b1111100;
+    let Inst{2} = 0b0;
+  }
+
+// SL2_return_f: Deallocate stack frame and return.
+// SL2_return_f -> SL2_return_fnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_f: SUBInst <
+  (outs ),
+  (ins ),
+  "if (!p0) dealloc_return"> {
+    let Inst{12-6} = 0b1111101;
+    let Inst{2-0} = 0b101;
+  }
+
+// SA1_combine3i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine3i: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins u2Imm:$u2),
+  "$Rdd = combine(#3, #$u2)"> {
+    bits<3> Rdd;
+    bits<2> u2;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b0;
+    let Inst{4-3} = 0b11;
+    let Inst{2-0} = Rdd;
+    let Inst{6-5} = u2;
+  }
+
+// SS2_storebi0: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi0: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+  "memb($Rs + #$u4_0)=#0"> {
+    bits<4> Rs;
+    bits<4> u4_0;
+
+    let Inst{12-8} = 0b10010;
+    let Inst{7-4} = Rs;
+    let Inst{3-0} = u4_0;
+  }
+
+// SA1_clrtnew: Clear if true.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrtnew: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins ),
+  "if (p0.new) $Rd = #0"> {
+    bits<4> Rd;
+
+    let Inst{12-9} = 0b1101;
+    let Inst{6-4} = 0b100;
+    let Inst{3-0} = Rd;
+  }
+
+// SL2_loadruh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadruh_io: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+  "$Rd = memuh($Rs + #$u3_1)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<4> u3_1;
+
+    let Inst{12-11} = 0b01;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+    let Inst{10-8} = u3_1{3-1};
+  }
+
+// SL2_jumpr31_tnew: Indirect conditional jump if true.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_tnew: SUBInst <
+  (outs ),
+  (ins ),
+  "if (p0.new) jumpr:nt r31"> {
+    let Inst{12-6} = 0b1111111;
+    let Inst{2-0} = 0b110;
+  }
+
+// SA1_addi: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
+def V4_SA1_addi: SUBInst <
+  (outs IntRegs:$Rx),
+  (ins IntRegs:$_src_, s7Ext:$s7),
+  "$Rx = add($_src_, #$s7)" ,
+  [] ,
+  "$_src_ = $Rx"> {
+    bits<4> Rx;
+    bits<7> s7;
+
+    let Inst{12-11} = 0b00;
+    let Inst{3-0} = Rx;
+    let Inst{10-4} = s7;
+  }
+
+// SL1_loadrub_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadrub_io: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+  "$Rd = memub($Rs + #$u4_0)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<4> u4_0;
+
+    let Inst{12} = 0b1;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+    let Inst{11-8} = u4_0;
+  }
+
+// SL1_loadri_io: Load word.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL1_loadri_io: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+  "$Rd = memw($Rs + #$u4_2)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<6> u4_2;
+
+    let Inst{12} = 0b0;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+    let Inst{11-8} = u4_2{5-2};
+  }
+
+// SA1_cmpeqi: Compareimmed.
+let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_cmpeqi: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u2Imm:$u2),
+  "p0 = cmp.eq($Rs, #$u2)"> {
+    bits<4> Rs;
+    bits<2> u2;
+
+    let Inst{12-8} = 0b11001;
+    let Inst{7-4} = Rs;
+    let Inst{1-0} = u2;
+  }
+
+// SA1_combinerz: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinerz: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins IntRegs:$Rs),
+  "$Rdd = combine($Rs, #0)"> {
+    bits<3> Rdd;
+    bits<4> Rs;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b1;
+    let Inst{3} = 0b1;
+    let Inst{2-0} = Rdd;
+    let Inst{7-4} = Rs;
+  }
+
+// SL2_return_t: Deallocate stack frame and return.
+// SL2_return_t -> SL2_return_tnew
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_t: SUBInst <
+  (outs ),
+  (ins ),
+  "if (p0) dealloc_return"> {
+    let Inst{12-6} = 0b1111101;
+    let Inst{2-0} = 0b100;
+  }
+
+// SS2_allocframe: Allocate stack frame.
+let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_allocframe: SUBInst <
+  (outs ),
+  (ins u5_3Imm:$u5_3),
+  "allocframe(#$u5_3)"> {
+    bits<8> u5_3;
+
+    let Inst{12-9} = 0b1110;
+    let Inst{8-4} = u5_3{7-3};
+  }
+
+// SS2_storeh_io: Store half.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
+def V4_SS2_storeh_io: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
+  "memh($Rs + #$u3_1) = $Rt"> {
+    bits<4> Rs;
+    bits<4> u3_1;
+    bits<4> Rt;
+
+    let Inst{12-11} = 0b00;
+    let Inst{7-4} = Rs;
+    let Inst{10-8} = u3_1{3-1};
+    let Inst{3-0} = Rt;
+  }
+
+// SS2_storewi0: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi0: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+  "memw($Rs + #$u4_2)=#0"> {
+    bits<4> Rs;
+    bits<6> u4_2;
+
+    let Inst{12-8} = 0b10000;
+    let Inst{7-4} = Rs;
+    let Inst{3-0} = u4_2{5-2};
+  }
+
+// SS2_storewi1: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storewi1: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_2Imm:$u4_2),
+  "memw($Rs + #$u4_2)=#1"> {
+    bits<4> Rs;
+    bits<6> u4_2;
+
+    let Inst{12-8} = 0b10001;
+    let Inst{7-4} = Rs;
+    let Inst{3-0} = u4_2{5-2};
+  }
+
+// SL2_jumpr31: Indirect conditional jump if true.
+let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31: SUBInst <
+  (outs ),
+  (ins ),
+  "jumpr r31"> {
+    let Inst{12-6} = 0b1111111;
+    let Inst{2} = 0b0;
+  }
+
+// SA1_combinezr: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combinezr: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins IntRegs:$Rs),
+  "$Rdd = combine(#0, $Rs)"> {
+    bits<3> Rdd;
+    bits<4> Rs;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b1;
+    let Inst{3} = 0b0;
+    let Inst{2-0} = Rdd;
+    let Inst{7-4} = Rs;
+  }
+
+// SL2_loadrh_io: Load half.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrh_io: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, u3_1Imm:$u3_1),
+  "$Rd = memh($Rs + #$u3_1)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<4> u3_1;
+
+    let Inst{12-11} = 0b00;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+    let Inst{10-8} = u3_1{3-1};
+  }
+
+// SA1_addrx: Add.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addrx: SUBInst <
+  (outs IntRegs:$Rx),
+  (ins IntRegs:$_src_, IntRegs:$Rs),
+  "$Rx = add($_src_, $Rs)" ,
+  [] ,
+  "$_src_ = $Rx"> {
+    bits<4> Rx;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b11000;
+    let Inst{3-0} = Rx;
+    let Inst{7-4} = Rs;
+  }
+
+// SA1_setin1: Set to -1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_setin1: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins ),
+  "$Rd = #-1"> {
+    bits<4> Rd;
+
+    let Inst{12-9} = 0b1101;
+    let Inst{6} = 0b0;
+    let Inst{3-0} = Rd;
+  }
+
+// SA1_sxth: Sxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxth: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = sxth($Rs)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10100;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SA1_combine0i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine0i: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins u2Imm:$u2),
+  "$Rdd = combine(#0, #$u2)"> {
+    bits<3> Rdd;
+    bits<2> u2;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b0;
+    let Inst{4-3} = 0b00;
+    let Inst{2-0} = Rdd;
+    let Inst{6-5} = u2;
+  }
+
+// SA1_combine2i: Combines.
+let isCodeGenOnly = 1, hasSideEffects = 0 in
+def V4_SA1_combine2i: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins u2Imm:$u2),
+  "$Rdd = combine(#2, #$u2)"> {
+    bits<3> Rdd;
+    bits<2> u2;
+
+    let Inst{12-10} = 0b111;
+    let Inst{8} = 0b0;
+    let Inst{4-3} = 0b10;
+    let Inst{2-0} = Rdd;
+    let Inst{6-5} = u2;
+  }
+
+// SA1_sxtb: Sxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_sxtb: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = sxtb($Rs)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10101;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SA1_clrf: Clear if false.
+// SA1_clrf -> SA1_clrfnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrf: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins ),
+  "if (!p0) $Rd = #0"> {
+    bits<4> Rd;
+
+    let Inst{12-9} = 0b1101;
+    let Inst{6-4} = 0b111;
+    let Inst{3-0} = Rd;
+  }
+
+// SL2_loadrb_io: Load byte.
+let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadrb_io: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs, u3_0Imm:$u3_0),
+  "$Rd = memb($Rs + #$u3_0)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+    bits<3> u3_0;
+
+    let Inst{12-11} = 0b10;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+    let Inst{10-8} = u3_0;
+  }
+
+// SA1_tfr: Tfr.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_tfr: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = $Rs"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10000;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SL2_loadrd_sp: Load dword.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
+def V4_SL2_loadrd_sp: SUBInst <
+  (outs DoubleRegs:$Rdd),
+  (ins u5_3Imm:$u5_3),
+  "$Rdd = memd(r29 + #$u5_3)"> {
+    bits<3> Rdd;
+    bits<8> u5_3;
+
+    let Inst{12-8} = 0b11110;
+    let Inst{2-0} = Rdd;
+    let Inst{7-3} = u5_3{7-3};
+  }
+
+// SA1_and1: And #1.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_and1: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = and($Rs, #1)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10010;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SS2_storebi1: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS2_storebi1: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_0Imm:$u4_0),
+  "memb($Rs + #$u4_0)=#1"> {
+    bits<4> Rs;
+    bits<4> u4_0;
+
+    let Inst{12-8} = 0b10011;
+    let Inst{7-4} = Rs;
+    let Inst{3-0} = u4_0;
+  }
+
+// SA1_inc: Inc.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_inc: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = add($Rs, #1)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10001;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SS2_stored_sp: Store dword.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
+def V4_SS2_stored_sp: SUBInst <
+  (outs ),
+  (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
+  "memd(r29 + #$s6_3) = $Rtt"> {
+    bits<9> s6_3;
+    bits<3> Rtt;
+
+    let Inst{12-9} = 0b0101;
+    let Inst{8-3} = s6_3{8-3};
+    let Inst{2-0} = Rtt;
+  }
+
+// SS2_storew_sp: Store word.
+let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS2_storew_sp: SUBInst <
+  (outs ),
+  (ins u5_2Imm:$u5_2, IntRegs:$Rt),
+  "memw(r29 + #$u5_2) = $Rt"> {
+    bits<7> u5_2;
+    bits<4> Rt;
+
+    let Inst{12-9} = 0b0100;
+    let Inst{8-4} = u5_2{6-2};
+    let Inst{3-0} = Rt;
+  }
+
+// SL2_jumpr31_fnew: Indirect conditional jump if false.
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_fnew: SUBInst <
+  (outs ),
+  (ins ),
+  "if (!p0.new) jumpr:nt r31"> {
+    let Inst{12-6} = 0b1111111;
+    let Inst{2-0} = 0b111;
+  }
+
+// SA1_clrt: Clear if true.
+// SA1_clrt -> SA1_clrtnew
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrt: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins ),
+  "if (p0) $Rd = #0"> {
+    bits<4> Rd;
+
+    let Inst{12-9} = 0b1101;
+    let Inst{6-4} = 0b110;
+    let Inst{3-0} = Rd;
+  }
+
+// SL2_return: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return: SUBInst <
+  (outs ),
+  (ins ),
+  "dealloc_return"> {
+    let Inst{12-6} = 0b1111101;
+    let Inst{2} = 0b0;
+  }
+
+// SA1_dec: Dec.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_dec: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = add($Rs,#-1)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10011;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SA1_seti: Set immed.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
+def V4_SA1_seti: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins u6Ext:$u6),
+  "$Rd = #$u6"> {
+    bits<4> Rd;
+    bits<6> u6;
+
+    let Inst{12-10} = 0b010;
+    let Inst{3-0} = Rd;
+    let Inst{9-4} = u6;
+  }
+
+// SL2_jumpr31_t: Indirect conditional jump if true.
+// SL2_jumpr31_t -> SL2_jumpr31_tnew
+let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+def V4_SL2_jumpr31_t: SUBInst <
+  (outs ),
+  (ins ),
+  "if (p0) jumpr r31"> {
+    let Inst{12-6} = 0b1111111;
+    let Inst{2-0} = 0b100;
+  }
+
+// SA1_clrfnew: Clear if false.
+let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_clrfnew: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins ),
+  "if (!p0.new) $Rd = #0"> {
+    bits<4> Rd;
+
+    let Inst{12-9} = 0b1101;
+    let Inst{6-4} = 0b101;
+    let Inst{3-0} = Rd;
+  }
+
+// SS1_storew_io: Store word.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
+def V4_SS1_storew_io: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
+  "memw($Rs + #$u4_2) = $Rt"> {
+    bits<4> Rs;
+    bits<6> u4_2;
+    bits<4> Rt;
+
+    let Inst{12} = 0b0;
+    let Inst{7-4} = Rs;
+    let Inst{11-8} = u4_2{5-2};
+    let Inst{3-0} = Rt;
+  }
+
+// SA1_zxtb: Zxtb.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxtb: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = and($Rs, #255)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10111;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
+// SA1_addsp: Add.
+let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_addsp: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins u6_2Imm:$u6_2),
+  "$Rd = add(r29, #$u6_2)"> {
+    bits<4> Rd;
+    bits<8> u6_2;
+
+    let Inst{12-10} = 0b011;
+    let Inst{3-0} = Rd;
+    let Inst{9-4} = u6_2{7-2};
+  }
+
+// SL2_loadri_sp: Load word.
+let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
+def V4_SL2_loadri_sp: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins u5_2Imm:$u5_2),
+  "$Rd = memw(r29 + #$u5_2)"> {
+    bits<4> Rd;
+    bits<7> u5_2;
+
+    let Inst{12-9} = 0b1110;
+    let Inst{3-0} = Rd;
+    let Inst{8-4} = u5_2{6-2};
+  }
+
+// SS1_storeb_io: Store byte.
+let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
+def V4_SS1_storeb_io: SUBInst <
+  (outs ),
+  (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
+  "memb($Rs + #$u4_0) = $Rt"> {
+    bits<4> Rs;
+    bits<4> u4_0;
+    bits<4> Rt;
+
+    let Inst{12} = 0b1;
+    let Inst{7-4} = Rs;
+    let Inst{11-8} = u4_0;
+    let Inst{3-0} = Rt;
+  }
+
+// SL2_return_tnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_tnew: SUBInst <
+  (outs ),
+  (ins ),
+  "if (p0.new) dealloc_return:nt"> {
+    let Inst{12-6} = 0b1111101;
+    let Inst{2-0} = 0b110;
+  }
+
+// SL2_return_fnew: Deallocate stack frame and return.
+let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
+def V4_SL2_return_fnew: SUBInst <
+  (outs ),
+  (ins ),
+  "if (!p0.new) dealloc_return:nt"> {
+    let Inst{12-6} = 0b1111101;
+    let Inst{2-0} = 0b111;
+  }
+
+// SA1_zxth: Zxth.
+let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+def V4_SA1_zxth: SUBInst <
+  (outs IntRegs:$Rd),
+  (ins IntRegs:$Rs),
+  "$Rd = zxth($Rs)"> {
+    bits<4> Rd;
+    bits<4> Rs;
+
+    let Inst{12-8} = 0b10110;
+    let Inst{3-0} = Rd;
+    let Inst{7-4} = Rs;
+  }
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
new file mode 100644
index 0000000..624c0f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -0,0 +1,146 @@
+//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Hexagon MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                        MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              HexagonAsmPrinter &Printer) {
+  MCContext &MC = Printer.OutContext;
+  const MCExpr *ME;
+
+  // Populate the relocation type based on Hexagon target flags
+  // set on an operand
+  MCSymbolRefExpr::VariantKind RelocationType;
+  switch (MO.getTargetFlags()) {
+  default:
+    RelocationType = MCSymbolRefExpr::VK_None;
+    break;
+  case HexagonII::MO_PCREL:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+    break;
+  case HexagonII::MO_GOT:
+    RelocationType = MCSymbolRefExpr::VK_GOT;
+    break;
+  case HexagonII::MO_LO16:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16;
+    break;
+  case HexagonII::MO_HI16:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16;
+    break;
+  case HexagonII::MO_GPREL:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL;
+    break;
+  }
+
+  ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC);
+
+  if (!MO.isJTI() && MO.getOffset())
+    ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
+                                 MC);
+
+  return MCOperand::createExpr(ME);
+}
+
+// Create an MCInst from a MachineInstr
+void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                            MCInst &MCB, HexagonAsmPrinter &AP) {
+  if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+    HexagonMCInstrInfo::setInnerLoop(MCB);
+    return;
+  }
+  if (MI->getOpcode() == Hexagon::ENDLOOP1) {
+    HexagonMCInstrInfo::setOuterLoop(MCB);
+    return;
+  }
+  MCInst *MCI = new (AP.OutContext) MCInst;
+  MCI->setOpcode(MI->getOpcode());
+  assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
+         "MCI opcode should have been set on construction");
+  bool MustExtend = false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCO;
+    if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
+      MustExtend = true;
+
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCO = MCOperand::createReg(MO.getReg());
+      break;
+    case MachineOperand::MO_FPImmediate: {
+      APFloat Val = MO.getFPImm()->getValueAPF();
+      // FP immediates are used only when setting GPRs, so they may be dealt
+      // with like regular immediates from this point on.
+      MCO = MCOperand::createExpr(
+        MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(),
+                               AP.OutContext));
+      break;
+    }
+    case MachineOperand::MO_Immediate:
+      MCO = MCOperand::createExpr(
+        MCConstantExpr::create(MO.getImm(), AP.OutContext));
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCO = MCOperand::createExpr
+              (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(),
+               AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP);
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
+                         AP);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+      break;
+    }
+
+    MCI->addOperand(MCO);
+  }
+  AP.HexagonProcessInstruction(*MCI, *MI);
+  HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI,
+                                     MustExtend);
+  MCB.addOperand(MCOperand::createInst(MCI));
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
new file mode 100644
index 0000000..9579c8b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void HexagonMachineFunctionInfo::anchor() {}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 0000000..7672358
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,85 @@
+//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
+
+namespace llvm {
+
+  namespace Hexagon {
+    const unsigned int StartPacket = 0x1;
+    const unsigned int EndPacket = 0x2;
+  }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+  // SRetReturnReg - Some subtargets require that sret lowering includes
+  // returning the value of the returned struct in a register. This field
+  // holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+  unsigned StackAlignBaseReg;
+  std::vector<MachineInstr*> AllocaAdjustInsts;
+  int VarArgsFrameIndex;
+  bool HasClobberLR;
+  bool HasEHReturn;
+  std::map<const MachineInstr*, unsigned> PacketInfo;
+  virtual void anchor();
+
+public:
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0),
+    HasClobberLR(0), HasEHReturn(false) {}
+
+  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+                                                    StackAlignBaseReg(0),
+                                                    HasClobberLR(0),
+                                                    HasEHReturn(false) {}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  void addAllocaAdjustInst(MachineInstr* MI) {
+    AllocaAdjustInsts.push_back(MI);
+  }
+  const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+    return AllocaAdjustInsts;
+  }
+
+  void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+  int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+  void setStartPacket(MachineInstr* MI) {
+    PacketInfo[MI] |= Hexagon::StartPacket;
+  }
+  void setEndPacket(MachineInstr* MI)   {
+    PacketInfo[MI] |= Hexagon::EndPacket;
+  }
+  bool isStartPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+  }
+  bool isEndPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+  }
+  void setHasClobberLR(bool v) { HasClobberLR = v;  }
+  bool hasClobberLR() const { return HasClobberLR; }
+
+  bool hasEHReturn() const { return HasEHReturn; };
+  void setHasEHReturn(bool H = true) { HasEHReturn = H; };
+
+  void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
+  unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
new file mode 100644
index 0000000..7a52d68
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -0,0 +1,699 @@
+//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineScheduler.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+/// Platform-specific modifications to DAG.
+void VLIWMachineScheduler::postprocessDAG() {
+  SUnit* LastSequentialCall = nullptr;
+  // Currently we only catch the situation when compare gets scheduled
+  // before preceding call.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    // Remember the call.
+    if (SUnits[su].getInstr()->isCall())
+      LastSequentialCall = &(SUnits[su]);
+    // Look for a compare that defines a predicate.
+    else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall)
+      SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+  }
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+  if (!SU || !SU->getInstr())
+    return false;
+
+  // First see if the pipeline could receive this instruction
+  // in the current cycle.
+  switch (SU->getInstr()->getOpcode()) {
+  default:
+    if (!ResourcesModel->canReserveResources(SU->getInstr()))
+      return false;
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+    break;
+  }
+
+  // Now see if there are no other dependencies to instructions already
+  // in the packet.
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+    if (Packet[i]->Succs.size() == 0)
+      continue;
+    for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+         E = Packet[i]->Succs.end(); I != E; ++I) {
+      // Since we do not add pseudos to packets, might as well
+      // ignore order dependencies.
+      if (I->isCtrl())
+        continue;
+
+      if (I->getSUnit() == SU)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+  bool startNewCycle = false;
+  // Artificially reset state.
+  if (!SU) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    TotalPackets++;
+    return false;
+  }
+  // If this SU does not fit in the packet
+  // start a new one.
+  if (!isResourceAvailable(SU)) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    TotalPackets++;
+    startNewCycle = true;
+  }
+
+  switch (SU->getInstr()->getOpcode()) {
+  default:
+    ResourcesModel->reserveResources(SU->getInstr());
+    break;
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::CFI_INSTRUCTION:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+    break;
+  }
+  Packet.push_back(SU);
+
+#ifndef NDEBUG
+  DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+    DEBUG(dbgs() << "\t[" << i << "] SU(");
+    DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+    DEBUG(Packet[i]->getInstr()->dump());
+  }
+#endif
+
+  // If packet is now full, reset the state so in the next cycle
+  // we start fresh.
+  if (Packet.size() >= SchedModel->getIssueWidth()) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    TotalPackets++;
+    startNewCycle = true;
+  }
+
+  return startNewCycle;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+  DEBUG(dbgs()
+        << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
+        << " " << BB->getName()
+        << " in_func " << BB->getParent()->getFunction()->getName()
+        << " at loop depth "  << MLI->getLoopDepth(BB)
+        << " \n");
+
+  buildDAGWithRegPressure();
+
+  // Postprocess the DAG to add platform-specific artificial dependencies.
+  postprocessDAG();
+
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  SchedImpl->initialize(this);
+
+  // To view Height/Depth correctly, they should be accessed at least once.
+  //
+  // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+  // depth/height could be computed directly from the roots and leaves.
+  DEBUG(unsigned maxH = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getHeight() > maxH)
+            maxH = SUnits[su].getHeight();
+        dbgs() << "Max Height " << maxH << "\n";);
+  DEBUG(unsigned maxD = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getDepth() > maxD)
+            maxD = SUnits[su].getDepth();
+        dbgs() << "Max Depth " << maxD << "\n";);
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  initQueues(TopRoots, BotRoots);
+
+  bool IsTopNode = false;
+  while (true) {
+    DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
+    if (!checkSchedLimit())
+      break;
+
+    scheduleMI(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
+
+    // Notify the scheduling strategy after updating the DAG.
+    SchedImpl->schedNode(SU, IsTopNode);
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+  DAG = static_cast<VLIWMachineScheduler*>(dag);
+  SchedModel = DAG->getSchedModel();
+
+  Top.init(DAG, SchedModel);
+  Bot.init(DAG, SchedModel);
+
+  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+  // are disabled, then these HazardRecs will be disabled.
+  const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+  const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+  const TargetInstrInfo *TII = STI.getInstrInfo();
+  delete Top.HazardRec;
+  delete Bot.HazardRec;
+  Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+  Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+  delete Top.ResourceModel;
+  delete Bot.ResourceModel;
+  Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+  Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+
+  assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+    unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+      SU->TopReadyCycle = PredReadyCycle + MinLatency;
+  }
+  Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+    unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+  }
+  Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+  if (HazardRec->isEnabled())
+    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+  if (IssueCount + uops > SchedModel->getIssueWidth())
+    return true;
+
+  return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
+                                                     unsigned ReadyCycle) {
+  if (ReadyCycle < MinReadyCycle)
+    MinReadyCycle = ReadyCycle;
+
+  // Check for interlocks first. For the purpose of other heuristics, an
+  // instruction that cannot issue appears as if it's not in the ReadyQueue.
+  if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+    Pending.push(SU);
+  else
+    Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+  unsigned Width = SchedModel->getIssueWidth();
+  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+  unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+  if (!HazardRec->isEnabled()) {
+    // Bypass HazardRec virtual calls.
+    CurrCycle = NextCycle;
+  } else {
+    // Bypass getHazardType calls in case of long latency.
+    for (; CurrCycle != NextCycle; ++CurrCycle) {
+      if (isTop())
+        HazardRec->AdvanceCycle();
+      else
+        HazardRec->RecedeCycle();
+    }
+  }
+  CheckPending = true;
+
+  DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+        << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+  bool startNewCycle = false;
+
+  // Update the reservation table.
+  if (HazardRec->isEnabled()) {
+    if (!isTop() && SU->isCall) {
+      // Calls are scheduled with their preceding instructions. For bottom-up
+      // scheduling, clear the pipeline state before emitting.
+      HazardRec->Reset();
+    }
+    HazardRec->EmitInstruction(SU);
+  }
+
+  // Update DFA model.
+  startNewCycle = ResourceModel->reserveResources(SU);
+
+  // Check the instruction group dispatch limit.
+  // TODO: Check if this SU must end a dispatch group.
+  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+  if (startNewCycle) {
+    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+    bumpCycle();
+  }
+  else
+    DEBUG(dbgs() << "*** IssueCount " << IssueCount
+          << " at cycle " << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+  // If the available queue is empty, it is safe to reset MinReadyCycle.
+  if (Available.empty())
+    MinReadyCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+    SUnit *SU = *(Pending.begin()+i);
+    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+    if (ReadyCycle < MinReadyCycle)
+      MinReadyCycle = ReadyCycle;
+
+    if (ReadyCycle > CurrCycle)
+      continue;
+
+    if (checkHazard(SU))
+      continue;
+
+    Available.push(SU);
+    Pending.remove(Pending.begin()+i);
+    --i; --e;
+  }
+  CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+  if (Available.isInQueue(SU))
+    Available.remove(Available.find(SU));
+  else {
+    assert(Pending.isInQueue(SU) && "bad ready count");
+    Pending.remove(Pending.find(SU));
+  }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+  if (CheckPending)
+    releasePending();
+
+  for (unsigned i = 0; Available.empty(); ++i) {
+    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+           "permanent hazard"); (void)i;
+    ResourceModel->reserveResources(nullptr);
+    bumpCycle();
+    releasePending();
+  }
+  if (Available.size() == 1)
+    return *Available.begin();
+  return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+                                             const ReadyQueue &Q,
+                                             SUnit *SU, PressureChange P) {
+  dbgs() << Label << " " << Q.getName() << " ";
+  if (P.isValid())
+    dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+           << P.getUnitInc() << " ";
+  else
+    dbgs() << "     ";
+  SU->dump(DAG);
+}
+#endif
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = nullptr;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return nullptr;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+  SUnit *OnlyAvailableSucc = nullptr;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit &Succ = *I->getSUnit();
+    if (!Succ.isScheduled) {
+      // We found an available, but not scheduled, successor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+        return nullptr;
+      OnlyAvailableSucc = &Succ;
+    }
+  }
+  return OnlyAvailableSucc;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 50;
+static const unsigned ScaleTwo = 10;
+static const unsigned FactorOne = 2;
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+                                            SchedCandidate &Candidate,
+                                            RegPressureDelta &Delta,
+                                            bool verbose) {
+  // Initial trivial priority.
+  int ResCount = 1;
+
+  // Do not waste time on a node that is already scheduled.
+  if (!SU || SU->isScheduled)
+    return ResCount;
+
+  // Forced priority is high.
+  if (SU->isScheduleHigh)
+    ResCount += PriorityOne;
+
+  // Critical path first.
+  if (Q.getID() == TopQID) {
+    ResCount += (SU->getHeight() * ScaleTwo);
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Top.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  } else {
+    ResCount += (SU->getDepth() * ScaleTwo);
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Bot.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  }
+
+  unsigned NumNodesBlocking = 0;
+  if (Q.getID() == TopQID) {
+    // How many SUs does it block from scheduling?
+    // Look at all of the successors of this node.
+    // Count the number of nodes that
+    // this node is the sole unscheduled node for.
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I)
+      if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  } else {
+    // How many unscheduled predecessors block this node?
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I)
+      if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  }
+  ResCount += (NumNodesBlocking * ScaleTwo);
+
+  // Factor in reg pressure as a heuristic.
+  ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo);
+  ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo);
+
+  DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+
+  return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+                  SchedCandidate &Candidate) {
+  DEBUG(Q.dump());
+
+  // getMaxPressureDelta temporarily modifies the tracker.
+  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+  // BestSU remains NULL if no top candidates beat the best existing candidate.
+  CandResult FoundCandidate = NoCand;
+  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+    RegPressureDelta RPDelta;
+    TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+                                    DAG->getRegionCriticalPSets(),
+                                    DAG->getRegPressure().MaxSetPressure);
+
+    int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+    // Initialize the candidate if needed.
+    if (!Candidate.SU) {
+      Candidate.SU = *I;
+      Candidate.RPDelta = RPDelta;
+      Candidate.SCost = CurrentCost;
+      FoundCandidate = NodeOrder;
+      continue;
+    }
+
+    // Best cost.
+    if (CurrentCost > Candidate.SCost) {
+      DEBUG(traceCandidate("CCAND", Q, *I));
+      Candidate.SU = *I;
+      Candidate.RPDelta = RPDelta;
+      Candidate.SCost = CurrentCost;
+      FoundCandidate = BestCost;
+      continue;
+    }
+
+    // Fall through to original instruction order.
+    // Only consider node order if Candidate was chosen from this Q.
+    if (FoundCandidate == NoCand)
+      continue;
+  }
+  return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+  // Schedule as far as possible in the direction of no choice. This is most
+  // efficient, but also provides the best heuristics for CriticalPSets.
+  if (SUnit *SU = Bot.pickOnlyChoice()) {
+    IsTopNode = false;
+    return SU;
+  }
+  if (SUnit *SU = Top.pickOnlyChoice()) {
+    IsTopNode = true;
+    return SU;
+  }
+  SchedCandidate BotCand;
+  // Prefer bottom scheduling when heuristics are silent.
+  CandResult BotResult = pickNodeFromQueue(Bot.Available,
+                                           DAG->getBotRPTracker(), BotCand);
+  assert(BotResult != NoCand && "failed to find the first candidate");
+
+  // If either Q has a single candidate that provides the least increase in
+  // Excess pressure, we can immediately schedule from that Q.
+  //
+  // RegionCriticalPSets summarizes the pressure within the scheduled region and
+  // affects picking from either Q. If scheduling in one direction must
+  // increase pressure for one of the excess PSets, then schedule in that
+  // direction first to provide more freedom in the other direction.
+  if (BotResult == SingleExcess || BotResult == SingleCritical) {
+    IsTopNode = false;
+    return BotCand.SU;
+  }
+  // Check if the top Q has a better candidate.
+  SchedCandidate TopCand;
+  CandResult TopResult = pickNodeFromQueue(Top.Available,
+                                           DAG->getTopRPTracker(), TopCand);
+  assert(TopResult != NoCand && "failed to find the first candidate");
+
+  if (TopResult == SingleExcess || TopResult == SingleCritical) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  // If either Q has a single candidate that minimizes pressure above the
+  // original region's pressure pick it.
+  if (BotResult == SingleMax) {
+    IsTopNode = false;
+    return BotCand.SU;
+  }
+  if (TopResult == SingleMax) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  if (TopCand.SCost > BotCand.SCost) {
+    IsTopNode = true;
+    return TopCand.SU;
+  }
+  // Otherwise prefer the bottom candidate in node order.
+  IsTopNode = false;
+  return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() &&
+           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+    return nullptr;
+  }
+  SUnit *SU;
+  if (llvm::ForceTopDown) {
+    SU = Top.pickOnlyChoice();
+    if (!SU) {
+      SchedCandidate TopCand;
+      CandResult TopResult =
+        pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+      assert(TopResult != NoCand && "failed to find the first candidate");
+      (void)TopResult;
+      SU = TopCand.SU;
+    }
+    IsTopNode = true;
+  } else if (llvm::ForceBottomUp) {
+    SU = Bot.pickOnlyChoice();
+    if (!SU) {
+      SchedCandidate BotCand;
+      CandResult BotResult =
+        pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+      assert(BotResult != NoCand && "failed to find the first candidate");
+      (void)BotResult;
+      SU = BotCand.SU;
+    }
+    IsTopNode = false;
+  } else {
+    SU = pickNodeBidrectional(IsTopNode);
+  }
+  if (SU->isTopReady())
+    Top.removeReady(SU);
+  if (SU->isBottomReady())
+    Bot.removeReady(SU);
+
+  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+        << " Scheduling Instruction in cycle "
+        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+        SU->dump(DAG));
+  return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+  if (IsTopNode) {
+    SU->TopReadyCycle = Top.CurrCycle;
+    Top.bumpNode(SU);
+  } else {
+    SU->BotReadyCycle = Bot.CurrCycle;
+    Bot.bumpNode(SU);
+  }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
new file mode 100644
index 0000000..6034344
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -0,0 +1,244 @@
+//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler.      ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom Hexagon MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+class VLIWResourceModel {
+  /// ResourcesModel - Represents VLIW state.
+  /// Not limited to VLIW targets per say, but assumes
+  /// definition of DFA by a target.
+  DFAPacketizer *ResourcesModel;
+
+  const TargetSchedModel *SchedModel;
+
+  /// Local packet/bundle model. Purely
+  /// internal to the MI schedulre at the time.
+  std::vector<SUnit*> Packet;
+
+  /// Total packets created.
+  unsigned TotalPackets;
+
+public:
+  VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
+      : SchedModel(SM), TotalPackets(0) {
+  ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
+
+    // This hard requirement could be relaxed,
+    // but for now do not let it proceed.
+    assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+    Packet.resize(SchedModel->getIssueWidth());
+    Packet.clear();
+    ResourcesModel->clearResources();
+  }
+
+  ~VLIWResourceModel() {
+    delete ResourcesModel;
+  }
+
+  void resetPacketState() {
+    Packet.clear();
+  }
+
+  void resetDFA() {
+    ResourcesModel->clearResources();
+  }
+
+  void reset() {
+    Packet.clear();
+    ResourcesModel->clearResources();
+  }
+
+  bool isResourceAvailable(SUnit *SU);
+  bool reserveResources(SUnit *SU);
+  unsigned getTotalPackets() const { return TotalPackets; }
+};
+
+/// Extend the standard ScheduleDAGMI to provide more context and override the
+/// top-level schedule() driver.
+class VLIWMachineScheduler : public ScheduleDAGMILive {
+public:
+  VLIWMachineScheduler(MachineSchedContext *C,
+                       std::unique_ptr<MachineSchedStrategy> S)
+      : ScheduleDAGMILive(C, std::move(S)) {}
+
+  /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
+  /// time to do some work.
+  void schedule() override;
+  /// Perform platform-specific DAG postprocessing.
+  void postprocessDAG();
+};
+
+/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
+/// to balance the schedule.
+class ConvergingVLIWScheduler : public MachineSchedStrategy {
+
+  /// Store the state used by ConvergingVLIWScheduler heuristics, required
+  ///  for the lifetime of one invocation of pickNode().
+  struct SchedCandidate {
+    // The best SUnit candidate.
+    SUnit *SU;
+
+    // Register pressure values for the best candidate.
+    RegPressureDelta RPDelta;
+
+    // Best scheduling cost.
+    int SCost;
+
+    SchedCandidate(): SU(nullptr), SCost(0) {}
+  };
+  /// Represent the type of SchedCandidate found within a single queue.
+  enum CandResult {
+    NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
+    BestCost};
+
+  /// Each Scheduling boundary is associated with ready queues. It tracks the
+  /// current cycle in whichever direction at has moved, and maintains the state
+  /// of "hazards" and other interlocks at the current cycle.
+  struct VLIWSchedBoundary {
+    VLIWMachineScheduler *DAG;
+    const TargetSchedModel *SchedModel;
+
+    ReadyQueue Available;
+    ReadyQueue Pending;
+    bool CheckPending;
+
+    ScheduleHazardRecognizer *HazardRec;
+    VLIWResourceModel *ResourceModel;
+
+    unsigned CurrCycle;
+    unsigned IssueCount;
+
+    /// MinReadyCycle - Cycle of the soonest available instruction.
+    unsigned MinReadyCycle;
+
+    // Remember the greatest min operand latency.
+    unsigned MaxMinLatency;
+
+    /// Pending queues extend the ready queues with the same ID and the
+    /// PendingFlag set.
+    VLIWSchedBoundary(unsigned ID, const Twine &Name):
+      DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"),
+      Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
+      CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr),
+      CurrCycle(0), IssueCount(0),
+      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+    ~VLIWSchedBoundary() {
+      delete ResourceModel;
+      delete HazardRec;
+    }
+
+    void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
+      DAG = dag;
+      SchedModel = smodel;
+    }
+
+    bool isTop() const {
+      return Available.getID() == ConvergingVLIWScheduler::TopQID;
+    }
+
+    bool checkHazard(SUnit *SU);
+
+    void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+    void bumpCycle();
+
+    void bumpNode(SUnit *SU);
+
+    void releasePending();
+
+    void removeReady(SUnit *SU);
+
+    SUnit *pickOnlyChoice();
+  };
+
+  VLIWMachineScheduler *DAG;
+  const TargetSchedModel *SchedModel;
+
+  // State of the top and bottom scheduled instruction boundaries.
+  VLIWSchedBoundary Top;
+  VLIWSchedBoundary Bot;
+
+public:
+  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+  enum {
+    TopQID = 1,
+    BotQID = 2,
+    LogMaxQID = 2
+  };
+
+  ConvergingVLIWScheduler()
+    : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"),
+      Bot(BotQID, "BotQ") {}
+
+  void initialize(ScheduleDAGMI *dag) override;
+
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
+  void releaseTopNode(SUnit *SU) override;
+
+  void releaseBottomNode(SUnit *SU) override;
+
+  unsigned ReportPackets() {
+    return Top.ResourceModel->getTotalPackets() +
+           Bot.ResourceModel->getTotalPackets();
+  }
+
+protected:
+  SUnit *pickNodeBidrectional(bool &IsTopNode);
+
+  int SchedulingCost(ReadyQueue &Q,
+                     SUnit *SU, SchedCandidate &Candidate,
+                     RegPressureDelta &Delta, bool verbose);
+
+  CandResult pickNodeFromQueue(ReadyQueue &Q,
+                               const RegPressureTracker &RPTracker,
+                               SchedCandidate &Candidate);
+#ifndef NDEBUG
+  void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+                      PressureChange P = PressureChange());
+#endif
+};
+
+} // namespace
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
new file mode 100644
index 0000000..20c4ab1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -0,0 +1,689 @@
+//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements NewValueJump pass in Hexagon.
+// Ideally, we should merge this as a Peephole pass prior to register
+// allocation, but because we have a spill in between the feeder and new value
+// jump instructions, we are forced to write after register allocation.
+// Having said that, we should re-attempt to pull this earlier at some point
+// in future.
+
+// The basic approach looks for sequence of predicated jump, compare instruciton
+// that genereates the predicate and, the feeder to the predicate. Once it finds
+// all, it collapses compare and jump instruction into a new valu jump
+// intstructions.
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/PassSupport.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-nvj"
+
+STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
+
+static cl::opt<int>
+DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc(
+  "Maximum number of predicated jumps to be converted to New Value Jump"));
+
+static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
+    cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable New Value Jumps"));
+
+namespace llvm {
+  FunctionPass *createHexagonNewValueJump();
+  void initializeHexagonNewValueJumpPass(PassRegistry&);
+}
+
+
+namespace {
+  struct HexagonNewValueJump : public MachineFunctionPass {
+    const HexagonInstrInfo    *QII;
+    const HexagonRegisterInfo *QRI;
+
+  public:
+    static char ID;
+
+    HexagonNewValueJump() : MachineFunctionPass(ID) {
+      initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineBranchProbabilityInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    const char *getPassName() const override {
+      return "Hexagon NewValueJump";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  private:
+    /// \brief A handle to the branch probability pass.
+    const MachineBranchProbabilityInfo *MBPI;
+
+    bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+  };
+
+} // end of anonymous namespace
+
+char HexagonNewValueJump::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
+                      "Hexagon NewValueJump", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
+                    "Hexagon NewValueJump", false, false)
+
+
+// We have identified this II could be feeder to NVJ,
+// verify that it can be.
+static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
+                                      const TargetRegisterInfo *TRI,
+                                      MachineBasicBlock::iterator II,
+                                      MachineBasicBlock::iterator end,
+                                      MachineBasicBlock::iterator skip,
+                                      MachineFunction &MF) {
+
+  // Predicated instruction can not be feeder to NVJ.
+  if (QII->isPredicated(II))
+    return false;
+
+  // Bail out if feederReg is a paired register (double regs in
+  // our case). One would think that we can check to see if a given
+  // register cmpReg1 or cmpReg2 is a sub register of feederReg
+  // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
+  // before the callsite of this function
+  // But we can not as it comes in the following fashion.
+  //    %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+  //    %R0<def> = KILL %R0, %D0<imp-use,kill>
+  //    %P0<def> = CMPEQri %R0<kill>, 0
+  // Hence, we need to check if it's a KILL instruction.
+  if (II->getOpcode() == TargetOpcode::KILL)
+    return false;
+
+
+  // Make sure there there is no 'def' or 'use' of any of the uses of
+  // feeder insn between it's definition, this MI and jump, jmpInst
+  // skipping compare, cmpInst.
+  // Here's the example.
+  //    r21=memub(r22+r24<<#0)
+  //    p0 = cmp.eq(r21, #0)
+  //    r4=memub(r3+r21<<#0)
+  //    if (p0.new) jump:t .LBB29_45
+  // Without this check, it will be converted into
+  //    r4=memub(r3+r21<<#0)
+  //    r21=memub(r22+r24<<#0)
+  //    p0 = cmp.eq(r21, #0)
+  //    if (p0.new) jump:t .LBB29_45
+  // and result WAR hazards if converted to New Value Jump.
+
+  for (unsigned i = 0; i < II->getNumOperands(); ++i) {
+    if (II->getOperand(i).isReg() &&
+        (II->getOperand(i).isUse() || II->getOperand(i).isDef())) {
+      MachineBasicBlock::iterator localII = II;
+      ++localII;
+      unsigned Reg = II->getOperand(i).getReg();
+      for (MachineBasicBlock::iterator localBegin = localII;
+                        localBegin != end; ++localBegin) {
+        if (localBegin == skip ) continue;
+        // Check for Subregisters too.
+        if (localBegin->modifiesRegister(Reg, TRI) ||
+            localBegin->readsRegister(Reg, TRI))
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+// These are the common checks that need to performed
+// to determine if
+// 1. compare instruction can be moved before jump.
+// 2. feeder to the compare instruction can be moved before jump.
+static bool commonChecksToProhibitNewValueJump(bool afterRA,
+                          MachineBasicBlock::iterator MII) {
+
+  // If store in path, bail out.
+  if (MII->getDesc().mayStore())
+    return false;
+
+  // if call in path, bail out.
+  if (MII->getOpcode() == Hexagon::J2_call)
+    return false;
+
+  // if NVJ is running prior to RA, do the following checks.
+  if (!afterRA) {
+    // The following Target Opcode instructions are spurious
+    // to new value jump. If they are in the path, bail out.
+    // KILL sets kill flag on the opcode. It also sets up a
+    // single register, out of pair.
+    //    %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+    //    %R0<def> = KILL %R0, %D0<imp-use,kill>
+    //    %P0<def> = CMPEQri %R0<kill>, 0
+    // PHI can be anything after RA.
+    // COPY can remateriaze things in between feeder, compare and nvj.
+    if (MII->getOpcode() == TargetOpcode::KILL ||
+        MII->getOpcode() == TargetOpcode::PHI  ||
+        MII->getOpcode() == TargetOpcode::COPY)
+      return false;
+
+    // The following pseudo Hexagon instructions sets "use" and "def"
+    // of registers by individual passes in the backend. At this time,
+    // we don't know the scope of usage and definitions of these
+    // instructions.
+    if (MII->getOpcode() == Hexagon::LDriw_pred     ||
+        MII->getOpcode() == Hexagon::STriw_pred)
+      return false;
+  }
+
+  return true;
+}
+
+static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
+                                     const TargetRegisterInfo *TRI,
+                                     MachineBasicBlock::iterator II,
+                                     unsigned pReg,
+                                     bool secondReg,
+                                     bool optLocation,
+                                     MachineBasicBlock::iterator end,
+                                     MachineFunction &MF) {
+
+  MachineInstr *MI = II;
+
+  // If the second operand of the compare is an imm, make sure it's in the
+  // range specified by the arch.
+  if (!secondReg) {
+    int64_t v = MI->getOperand(2).getImm();
+
+    if (!(isUInt<5>(v) ||
+         ((MI->getOpcode() == Hexagon::C2_cmpeqi ||
+           MI->getOpcode() == Hexagon::C2_cmpgti) &&
+          (v == -1))))
+      return false;
+  }
+
+  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+  cmpReg1 = MI->getOperand(1).getReg();
+
+  if (secondReg) {
+    cmpOp2 = MI->getOperand(2).getReg();
+
+    // Make sure that that second register is not from COPY
+    // At machine code level, we don't need this, but if we decide
+    // to move new value jump prior to RA, we would be needing this.
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
+      MachineInstr *def = MRI.getVRegDef(cmpOp2);
+      if (def->getOpcode() == TargetOpcode::COPY)
+        return false;
+    }
+  }
+
+  // Walk the instructions after the compare (predicate def) to the jump,
+  // and satisfy the following conditions.
+  ++II ;
+  for (MachineBasicBlock::iterator localII = II; localII != end;
+       ++localII) {
+
+    // Check 1.
+    // If "common" checks fail, bail out.
+    if (!commonChecksToProhibitNewValueJump(optLocation, localII))
+      return false;
+
+    // Check 2.
+    // If there is a def or use of predicate (result of compare), bail out.
+    if (localII->modifiesRegister(pReg, TRI) ||
+        localII->readsRegister(pReg, TRI))
+      return false;
+
+    // Check 3.
+    // If there is a def of any of the use of the compare (operands of compare),
+    // bail out.
+    // Eg.
+    //    p0 = cmp.eq(r2, r0)
+    //    r2 = r4
+    //    if (p0.new) jump:t .LBB28_3
+    if (localII->modifiesRegister(cmpReg1, TRI) ||
+        (secondReg && localII->modifiesRegister(cmpOp2, TRI)))
+      return false;
+  }
+  return true;
+}
+
+
+// Given a compare operator, return a matching New Value Jump compare operator.
+// Make sure that MI here is included in isNewValueJumpCandidate.
+static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
+                                      bool secondRegNewified,
+                                      MachineBasicBlock *jmpTarget,
+                                      const MachineBranchProbabilityInfo
+                                      *MBPI) {
+  bool taken = false;
+  MachineBasicBlock *Src = MI->getParent();
+  const BranchProbability Prediction =
+    MBPI->getEdgeProbability(Src, jmpTarget);
+
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
+  switch (MI->getOpcode()) {
+    case Hexagon::C2_cmpeq:
+      return taken ? Hexagon::J4_cmpeq_t_jumpnv_t
+                   : Hexagon::J4_cmpeq_t_jumpnv_nt;
+
+    case Hexagon::C2_cmpeqi: {
+      if (reg >= 0)
+        return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t
+                     : Hexagon::J4_cmpeqi_t_jumpnv_nt;
+      else
+        return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t
+                     : Hexagon::J4_cmpeqn1_t_jumpnv_nt;
+    }
+
+    case Hexagon::C2_cmpgt: {
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmplt_t_jumpnv_t
+                     : Hexagon::J4_cmplt_t_jumpnv_nt;
+      else
+        return taken ? Hexagon::J4_cmpgt_t_jumpnv_t
+                     : Hexagon::J4_cmpgt_t_jumpnv_nt;
+    }
+
+    case Hexagon::C2_cmpgti: {
+      if (reg >= 0)
+        return taken ? Hexagon::J4_cmpgti_t_jumpnv_t
+                     : Hexagon::J4_cmpgti_t_jumpnv_nt;
+      else
+        return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t
+                     : Hexagon::J4_cmpgtn1_t_jumpnv_nt;
+    }
+
+    case Hexagon::C2_cmpgtu: {
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmpltu_t_jumpnv_t
+                     : Hexagon::J4_cmpltu_t_jumpnv_nt;
+      else
+        return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t
+                     : Hexagon::J4_cmpgtu_t_jumpnv_nt;
+    }
+
+    case Hexagon::C2_cmpgtui:
+      return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
+                   : Hexagon::J4_cmpgtui_t_jumpnv_nt;
+
+    case Hexagon::C4_cmpneq:
+      return taken ? Hexagon::J4_cmpeq_f_jumpnv_t
+                   : Hexagon::J4_cmpeq_f_jumpnv_nt;
+
+    case Hexagon::C4_cmplte:
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmplt_f_jumpnv_t
+                     : Hexagon::J4_cmplt_f_jumpnv_nt;
+      return taken ? Hexagon::J4_cmpgt_f_jumpnv_t
+                   : Hexagon::J4_cmpgt_f_jumpnv_nt;
+
+    case Hexagon::C4_cmplteu:
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmpltu_f_jumpnv_t
+                     : Hexagon::J4_cmpltu_f_jumpnv_nt;
+      return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t
+                   : Hexagon::J4_cmpgtu_f_jumpnv_nt;
+
+    default:
+       llvm_unreachable("Could not find matching New Value Jump instruction.");
+  }
+  // return *some value* to avoid compiler warning
+  return 0;
+}
+
+bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI)
+      const {
+  switch (MI->getOpcode()) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+      return true;
+
+    default:
+      return false;
+  }
+}
+
+
+bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
+
+  DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
+               << "********** Function: "
+               << MF.getName() << "\n");
+
+  // If we move NewValueJump before register allocation we'll need live variable
+  // analysis here too.
+
+  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  QRI = static_cast<const HexagonRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+  if (DisableNewValueJumps) {
+    return false;
+  }
+
+  int nvjCount = DbgNVJCount;
+  int nvjGenerated = 0;
+
+  // Loop through all the bb's of the function
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+        MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+
+    DEBUG(dbgs() << "** dumping bb ** "
+                 << MBB->getNumber() << "\n");
+    DEBUG(MBB->dump());
+    DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
+    bool foundJump    = false;
+    bool foundCompare = false;
+    bool invertPredicate = false;
+    unsigned predReg = 0; // predicate reg of the jump.
+    unsigned cmpReg1 = 0;
+    int cmpOp2 = 0;
+    bool MO1IsKill = false;
+    bool MO2IsKill = false;
+    MachineBasicBlock::iterator jmpPos;
+    MachineBasicBlock::iterator cmpPos;
+    MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
+    MachineBasicBlock *jmpTarget = nullptr;
+    bool afterRA = false;
+    bool isSecondOpReg = false;
+    bool isSecondOpNewified = false;
+    // Traverse the basic block - bottom up
+    for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
+             MII != E;) {
+      MachineInstr *MI = --MII;
+      if (MI->isDebugValue()) {
+        continue;
+      }
+
+      if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
+        break;
+
+      DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
+
+      if (!foundJump &&
+         (MI->getOpcode() == Hexagon::J2_jumpt ||
+          MI->getOpcode() == Hexagon::J2_jumpf ||
+          MI->getOpcode() == Hexagon::J2_jumptnewpt ||
+          MI->getOpcode() == Hexagon::J2_jumptnew ||
+          MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+          MI->getOpcode() == Hexagon::J2_jumpfnew)) {
+        // This is where you would insert your compare and
+        // instr that feeds compare
+        jmpPos = MII;
+        jmpInstr = MI;
+        predReg = MI->getOperand(0).getReg();
+        afterRA = TargetRegisterInfo::isPhysicalRegister(predReg);
+
+        // If ifconverter had not messed up with the kill flags of the
+        // operands, the following check on the kill flag would suffice.
+        // if(!jmpInstr->getOperand(0).isKill()) break;
+
+        // This predicate register is live out out of BB
+        // this would only work if we can actually use Live
+        // variable analysis on phy regs - but LLVM does not
+        // provide LV analysis on phys regs.
+        //if(LVs.isLiveOut(predReg, *MBB)) break;
+
+        // Get all the successors of this block - which will always
+        // be 2. Check if the predicate register is live in in those
+        // successor. If yes, we can not delete the predicate -
+        // I am doing this only because LLVM does not provide LiveOut
+        // at the BB level.
+        bool predLive = false;
+        for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+                            SIE = MBB->succ_end(); SI != SIE; ++SI) {
+          MachineBasicBlock* succMBB = *SI;
+         if (succMBB->isLiveIn(predReg)) {
+            predLive = true;
+          }
+        }
+        if (predLive)
+          break;
+
+        jmpTarget = MI->getOperand(1).getMBB();
+        foundJump = true;
+        if (MI->getOpcode() == Hexagon::J2_jumpf ||
+            MI->getOpcode() == Hexagon::J2_jumpfnewpt ||
+            MI->getOpcode() == Hexagon::J2_jumpfnew) {
+          invertPredicate = true;
+        }
+        continue;
+      }
+
+      // No new value jump if there is a barrier. A barrier has to be in its
+      // own packet. A barrier has zero operands. We conservatively bail out
+      // here if we see any instruction with zero operands.
+      if (foundJump && MI->getNumOperands() == 0)
+        break;
+
+      if (foundJump &&
+         !foundCompare &&
+          MI->getOperand(0).isReg() &&
+          MI->getOperand(0).getReg() == predReg) {
+
+        // Not all compares can be new value compare. Arch Spec: 7.6.1.1
+        if (isNewValueJumpCandidate(MI)) {
+
+          assert((MI->getDesc().isCompare()) &&
+              "Only compare instruction can be collapsed into New Value Jump");
+          isSecondOpReg = MI->getOperand(2).isReg();
+
+          if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg,
+                                        afterRA, jmpPos, MF))
+            break;
+
+          cmpInstr = MI;
+          cmpPos = MII;
+          foundCompare = true;
+
+          // We need cmpReg1 and cmpOp2(imm or reg) while building
+          // new value jump instruction.
+          cmpReg1 = MI->getOperand(1).getReg();
+          if (MI->getOperand(1).isKill())
+            MO1IsKill = true;
+
+          if (isSecondOpReg) {
+            cmpOp2 = MI->getOperand(2).getReg();
+            if (MI->getOperand(2).isKill())
+              MO2IsKill = true;
+          } else
+            cmpOp2 = MI->getOperand(2).getImm();
+          continue;
+        }
+      }
+
+      if (foundCompare && foundJump) {
+
+        // If "common" checks fail, bail out on this BB.
+        if (!commonChecksToProhibitNewValueJump(afterRA, MII))
+          break;
+
+        bool foundFeeder = false;
+        MachineBasicBlock::iterator feederPos = MII;
+        if (MI->getOperand(0).isReg() &&
+            MI->getOperand(0).isDef() &&
+           (MI->getOperand(0).getReg() == cmpReg1 ||
+            (isSecondOpReg &&
+             MI->getOperand(0).getReg() == (unsigned) cmpOp2))) {
+
+          unsigned feederReg = MI->getOperand(0).getReg();
+
+          // First try to see if we can get the feeder from the first operand
+          // of the compare. If we can not, and if secondOpReg is true
+          // (second operand of the compare is also register), try that one.
+          // TODO: Try to come up with some heuristic to figure out which
+          // feeder would benefit.
+
+          if (feederReg == cmpReg1) {
+            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) {
+              if (!isSecondOpReg)
+                break;
+              else
+                continue;
+            } else
+              foundFeeder = true;
+          }
+
+          if (!foundFeeder &&
+               isSecondOpReg &&
+               feederReg == (unsigned) cmpOp2)
+            if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF))
+              break;
+
+          if (isSecondOpReg) {
+            // In case of CMPLT, or CMPLTU, or EQ with the second register
+            // to newify, swap the operands.
+            if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
+                                     feederReg == (unsigned) cmpOp2) {
+              unsigned tmp = cmpReg1;
+              bool tmpIsKill = MO1IsKill;
+              cmpReg1 = cmpOp2;
+              MO1IsKill = MO2IsKill;
+              cmpOp2 = tmp;
+              MO2IsKill = tmpIsKill;
+            }
+
+            // Now we have swapped the operands, all we need to check is,
+            // if the second operand (after swap) is the feeder.
+            // And if it is, make a note.
+            if (feederReg == (unsigned)cmpOp2)
+              isSecondOpNewified = true;
+          }
+
+          // Now that we are moving feeder close the jump,
+          // make sure we are respecting the kill values of
+          // the operands of the feeder.
+
+          bool updatedIsKill = false;
+          for (unsigned i = 0; i < MI->getNumOperands(); i++) {
+            MachineOperand &MO = MI->getOperand(i);
+            if (MO.isReg() && MO.isUse()) {
+              unsigned feederReg = MO.getReg();
+              for (MachineBasicBlock::iterator localII = feederPos,
+                   end = jmpPos; localII != end; localII++) {
+                MachineInstr *localMI = localII;
+                for (unsigned j = 0; j < localMI->getNumOperands(); j++) {
+                  MachineOperand &localMO = localMI->getOperand(j);
+                  if (localMO.isReg() && localMO.isUse() &&
+                      localMO.isKill() && feederReg == localMO.getReg()) {
+                    // We found that there is kill of a use register
+                    // Set up a kill flag on the register
+                    localMO.setIsKill(false);
+                    MO.setIsKill();
+                    updatedIsKill = true;
+                    break;
+                  }
+                }
+                if (updatedIsKill) break;
+              }
+            }
+            if (updatedIsKill) break;
+          }
+
+          MBB->splice(jmpPos, MI->getParent(), MI);
+          MBB->splice(jmpPos, MI->getParent(), cmpInstr);
+          DebugLoc dl = MI->getDebugLoc();
+          MachineInstr *NewMI;
+
+          assert((isNewValueJumpCandidate(cmpInstr)) &&
+                 "This compare is not a New Value Jump candidate.");
+          unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
+                                               isSecondOpNewified,
+                                               jmpTarget, MBPI);
+          if (invertPredicate)
+            opc = QII->getInvertedPredicatedOpcode(opc);
+
+          if (isSecondOpReg)
+            NewMI = BuildMI(*MBB, jmpPos, dl,
+                                  QII->get(opc))
+                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
+                                    .addReg(cmpOp2, getKillRegState(MO2IsKill))
+                                    .addMBB(jmpTarget);
+
+          else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
+                    cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
+                    cmpOp2 == -1 )
+            // Corresponding new-value compare jump instructions don't have the
+            // operand for -1 immediate value.
+            NewMI = BuildMI(*MBB, jmpPos, dl,
+                                  QII->get(opc))
+                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
+                                    .addMBB(jmpTarget);
+
+          else
+            NewMI = BuildMI(*MBB, jmpPos, dl,
+                                  QII->get(opc))
+                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
+                                    .addImm(cmpOp2)
+                                    .addMBB(jmpTarget);
+
+          assert(NewMI && "New Value Jump Instruction Not created!");
+          (void)NewMI;
+          if (cmpInstr->getOperand(0).isReg() &&
+              cmpInstr->getOperand(0).isKill())
+            cmpInstr->getOperand(0).setIsKill(false);
+          if (cmpInstr->getOperand(1).isReg() &&
+              cmpInstr->getOperand(1).isKill())
+            cmpInstr->getOperand(1).setIsKill(false);
+          cmpInstr->eraseFromParent();
+          jmpInstr->eraseFromParent();
+          ++nvjGenerated;
+          ++NumNVJGenerated;
+          break;
+        }
+      }
+    }
+  }
+
+  return true;
+
+}
+
+FunctionPass *llvm::createHexagonNewValueJump() {
+  return new HexagonNewValueJump();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
new file mode 100644
index 0000000..fbd29cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -0,0 +1,603 @@
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
+def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
+def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
+def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
+def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
+def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
+def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
+def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
+def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
+def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
+def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
+def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
+def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
+def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
+def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
+def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
+def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
+def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
+def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
+def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
+def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
+def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
+def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
+def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
+// Immediate operands.
+
+let OperandType = "OPERAND_IMMEDIATE",
+    DecoderMethod = "unsignedImmDecoder" in {
+  def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
+                              let DecoderMethod = "s32ImmDecoder"; }
+  def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
+                             let DecoderMethod = "s8ImmDecoder"; }
+  def s8Imm64 : Operand<i64>  { let ParserMatchClass = s8Imm64Operand;
+                                let DecoderMethod = "s8ImmDecoder"; }
+  def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+                             let DecoderMethod = "s6_0ImmDecoder"; }
+  def s6_3Imm : Operand<i32>;
+  def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
+                             let DecoderMethod = "s4_0ImmDecoder"; }
+  def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
+                               let DecoderMethod = "s4_0ImmDecoder"; }
+  def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
+                               let DecoderMethod = "s4_1ImmDecoder"; }
+  def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
+                               let DecoderMethod = "s4_2ImmDecoder"; }
+  def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
+                               let DecoderMethod = "s4_3ImmDecoder"; }
+  def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
+  def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+  def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
+  def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
+  def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
+  def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
+  def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
+  def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
+  def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
+  def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
+  def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
+  def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
+  def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
+  def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+  def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
+  def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
+  def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
+  def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
+  def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
+  def u5_0Imm : Operand<i32>;
+  def u5_1Imm : Operand<i32>;
+  def u5_2Imm : Operand<i32>;
+  def u5_3Imm : Operand<i32>;
+  def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
+  def u4_0Imm : Operand<i32>;
+  def u4_1Imm : Operand<i32>;
+  def u4_2Imm : Operand<i32>;
+  def u4_3Imm : Operand<i32>;
+  def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
+  def u3_0Imm : Operand<i32>;
+  def u3_1Imm : Operand<i32>;
+  def u3_2Imm : Operand<i32>;
+  def u3_3Imm : Operand<i32>;
+  def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
+  def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
+  def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+  def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
+                               let PrintMethod = "prints4_6ImmOperand";
+                               let DecoderMethod = "s4_6ImmDecoder";}
+  def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
+                               let DecoderMethod = "s4_6ImmDecoder";}
+  def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
+                               let PrintMethod = "prints3_6ImmOperand";
+                               let DecoderMethod = "s3_6ImmDecoder";}
+  def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
+                               let DecoderMethod = "s3_6ImmDecoder";}
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s32_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<30,2>(v);
+}]>;
+
+def s29_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<29,3>(v);
+}]>;
+
+def s16ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<16>(v);
+}]>;
+
+def s11_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}]>;
+
+def s11_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}]>;
+
+def s11_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}]>;
+
+def s11_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}]>;
+
+def s10ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<10>(v);
+}]>;
+
+def s8ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+def s8Imm64Pred  : PatLeaf<(i64 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+def s6ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+def s4_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<4>(v);
+}]>;
+
+def s4_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,1>(v);
+}]>;
+
+def s4_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,2>(v);
+}]>;
+
+def s4_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,3>(v);
+}]>;
+
+def u64ImmPred  : PatLeaf<(i64 imm), [{
+  // Adding "N ||" to suppress gcc unused warning.
+  return (N || true);
+}]>;
+
+def u32ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u32_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<29,3>(v);
+}]>;
+
+def u26_6ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<26,6>(v);
+}]>;
+
+def u16_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<16>(v);
+}]>;
+
+def u16_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,1>(v);
+}]>;
+
+def u16_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,2>(v);
+}]>;
+
+def u11_3ImmPred : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<11,3>(v);
+}]>;
+
+def u10ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<10>(v);
+}]>;
+
+def u9ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<9>(v);
+}]>;
+
+def u8ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<8>(v);
+}]>;
+
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+  // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+  // unsigned field and is strictly greater than 0.
+  return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
+def u7ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<7>(v);
+}]>;
+
+def u6ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<5>(v);
+}]>;
+
+def u4ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<4>(v);
+}]>;
+
+def u3ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<3>(v);
+}]>;
+
+def u2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<2>(v);
+}]>;
+
+def u1ImmPred  : PatLeaf<(i1 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
+def u1ImmPred32  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
+def m5BImmPred  : PatLeaf<(i32 imm), [{
+  // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  // this is specific to the zero extending of a negative by CombineInstr
+  int8_t v = (int8_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred  : PatLeaf<(i32 imm), [{
+  // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  // this is specific to the zero extending of a negative by CombineInstr
+  int16_t v = (int16_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred  : PatLeaf<(i32 imm), [{
+  // m5ImmPred predicate - True if the number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred  : PatLeaf<(i32 imm), [{
+  // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred  : PatLeaf<(i32 imm), [{
+  // nOneImmPred predicate - True if the immediate is -1.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-1 == v);
+}]>;
+
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+  // Set5ImmPred predicate - True if the number is in the series of values.
+  // [ 2^0, 2^1, ... 2^31 ]
+  // For use in setbit immediate.
+  uint32_t v = (int32_t)N->getSExtValue();
+  // Constrain to 32 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+  // Clr5ImmPred predicate - True if the number is in the series of
+  // bit negated values.
+  // [ 2^0, 2^1, ... 2^31 ]
+  // For use in clrbit immediate.
+  // Note: we are bit NOTing the value.
+  uint32_t v = ~ (int32_t)N->getSExtValue();
+  // Constrain to 32 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+  // True if the immediate is in range 0..31.
+  int32_t v = (int32_t)N->getSExtValue();
+  return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+  // Set4ImmPred predicate - True if the number is in the series of values:
+  // [ 2^0, 2^1, ... 2^15 ].
+  // For use in setbit immediate.
+  uint16_t v = (int16_t)N->getSExtValue();
+  // Constrain to 16 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+  // Clr4ImmPred predicate - True if the number is in the series of
+  // bit negated values:
+  // [ 2^0, 2^1, ... 2^15 ].
+  // For use in setbit and clrbit immediate.
+  uint16_t v = ~ (int16_t)N->getSExtValue();
+  // Constrain to 16 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+  // True if the immediate is in the range 0..15.
+  int16_t v = (int16_t)N->getSExtValue();
+  return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+  // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ].
+  // For use in setbit immediate.
+  uint8_t v = (int8_t)N->getSExtValue();
+  // Constrain to 8 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+  // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ].
+  // For use in setbit and clrbit immediate.
+  uint8_t v = ~ (int8_t)N->getSExtValue();
+  // Constrain to 8 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+  // True if the immediate is in the range  0..7.
+  int8_t v = (int8_t)N->getSExtValue();
+  return (v >= 0 && v <= 7);
+}]>;
+
+
+// Extendable immediate operands.
+def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
+def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
+def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
+def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
+def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
+def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
+def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
+def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
+def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
+def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
+def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
+def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
+def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
+def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
+def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
+def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
+def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
+def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
+def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
+def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
+
+
+
+let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
+    DecoderMethod = "unsignedImmDecoder" in {
+  def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
+  def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
+                              let DecoderMethod = "s16ImmDecoder"; }
+  def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
+                              let DecoderMethod = "s12ImmDecoder"; }
+  def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
+                              let DecoderMethod = "s11_0ImmDecoder"; }
+  def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
+                              let DecoderMethod = "s11_1ImmDecoder"; }
+  def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
+                              let DecoderMethod = "s11_2ImmDecoder"; }
+  def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
+                              let DecoderMethod = "s11_3ImmDecoder"; }
+  def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
+                              let DecoderMethod = "s10ImmDecoder"; }
+  def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
+                              let DecoderMethod = "s90ImmDecoder"; }
+  def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
+                              let DecoderMethod = "s8ImmDecoder"; }
+  def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
+  def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
+                              let DecoderMethod = "s6_0ImmDecoder"; }
+  def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
+  def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
+  def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
+  def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
+  def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+  def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
+  def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
+  def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
+  def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
+  def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
+}
+
+
+def s4_7ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field and
+    // is 128-byte aligned.
+    return isShiftedInt<4,7>(v);
+  return false;
+}]>;
+
+def s3_7ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field and
+    // is 128-byte aligned.
+    return isShiftedInt<3,7>(v);
+  return false;
+}]>;
+
+def s4_6ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field and
+    // is 64-byte aligned.
+    return isShiftedInt<4,6>(v);
+  return false;
+}]>;
+
+def s3_6ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field and
+    // is 64-byte aligned.
+    return isShiftedInt<3,6>(v);
+  return false;
+}]>;
+
+
+// This complex pattern exists only to create a machine instruction operand
+// of type "frame index". There doesn't seem to be a way to do that directly
+// in the patterns.
+def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
+
+// These complex patterns are not strictly necessary, since global address
+// folding will happen during DAG combining. For distinguishing between GA
+// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
+def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
+def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
+
+// Address operands.
+
+let PrintMethod = "printGlobalOperand" in {
+  def globaladdress : Operand<i32>;
+  def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
+}
+def brtargetExt : Operand<OtherVT> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
+}
+def calltarget : Operand<i32> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
+}
+
+def bblabel : Operand<i32>;
+def bbl     : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
+
+// Return true if for a 32 to 64-bit sign-extended load.
+def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+  if (!LD)
+    return false;
+  return LD->getExtensionType() == ISD::SEXTLOAD &&
+         LD->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
new file mode 100644
index 0000000..1723771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -0,0 +1,150 @@
+//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include "Hexagon.h"
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonOptimizeSZextends();
+  void initializeHexagonOptimizeSZextendsPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonOptimizeSZextends : public FunctionPass {
+  public:
+    static char ID;
+    HexagonOptimizeSZextends() : FunctionPass(ID) {
+      initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnFunction(Function &F) override;
+
+    const char *getPassName() const override {
+      return "Remove sign extends";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineFunctionAnalysis>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      AU.addPreserved<StackProtector>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool intrinsicAlreadySextended(Intrinsic::ID IntID);
+  };
+}
+
+char HexagonOptimizeSZextends::ID = 0;
+
+INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs",
+                "Remove Sign and Zero Extends for Args", false, false)
+
+bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) {
+  switch(IntID) {
+    case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
+  unsigned Idx = 1;
+  // Try to optimize sign extends in formal parameters. It's relying on
+  // callee already sign extending the values. I'm not sure if our ABI
+  // requires callee to sign extend though.
+  for (auto &Arg : F.args()) {
+    if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+      if (!isa<PointerType>(Arg.getType())) {
+        for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
+          if (isa<SExtInst>(*UI)) {
+            Instruction* Use = cast<Instruction>(*UI);
+            SExtInst* SI = new SExtInst(&Arg, Use->getType());
+            assert (EVT::getEVT(SI->getType()) ==
+                    (EVT::getEVT(Use->getType())));
+            ++UI;
+            Use->replaceAllUsesWith(SI);
+            Instruction* First = &F.getEntryBlock().front();
+            SI->insertBefore(First);
+            Use->eraseFromParent();
+          } else {
+            ++UI;
+          }
+        }
+      }
+    }
+    ++Idx;
+  }
+
+  // Try to remove redundant sext operations on Hexagon. The hardware
+  // already sign extends many 16 bit intrinsic operations to 32 bits.
+  // For example:
+  // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y)
+  // %sext233 = shl i32 %34, 16
+  // %conv52 = ashr exact i32 %sext233, 16
+  for (auto &B : F) {
+    for (auto &I : B) {
+      // Look for arithmetic shift right by 16.
+      BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I);
+      if (!(Ashr && Ashr->getOpcode() == Instruction::AShr))
+        continue;
+      Value *AshrOp1 = Ashr->getOperand(1);
+      ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1);
+      // Right shifted by 16.
+      if (!(C && C->getSExtValue() == 16))
+        continue;
+
+      // The first operand of Ashr comes from logical shift left.
+      Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0));
+      if (!(Shl && Shl->getOpcode() == Instruction::Shl))
+        continue;
+      Value *Intr = Shl->getOperand(0);
+      Value *ShlOp1 = Shl->getOperand(1);
+      C = dyn_cast<ConstantInt>(ShlOp1);
+      // Left shifted by 16.
+      if (!(C && C->getSExtValue() == 16))
+        continue;
+
+      // The first operand of Shl comes from an intrinsic.
+      if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) {
+        if (!intrinsicAlreadySextended(I->getIntrinsicID()))
+          continue;
+        // All is well. Replace all uses of AShr with I.
+        for (auto UI = Ashr->user_begin(), UE = Ashr->user_end();
+             UI != UE; ++UI) {
+          const Use &TheUse = UI.getUse();
+          if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) {
+            J->replaceUsesOfWith(Ashr, I);
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+
+FunctionPass *llvm::createHexagonOptimizeSZextends() {
+  return new HexagonOptimizeSZextends();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
new file mode 100644
index 0000000..e68ff85
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -0,0 +1,338 @@
+//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This peephole pass optimizes in the following cases.
+// 1. Optimizes redundant sign extends for the following case
+//    Transform the following pattern
+//    %vreg170<def> = SXTW %vreg166
+//    ...
+//    %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+//    Into
+//    %vreg176<def> = COPY vreg166
+//
+//  2. Optimizes redundant negation of predicates.
+//     %vreg15<def> = CMPGTrr %vreg6, %vreg2
+//     ...
+//     %vreg16<def> = NOT_p %vreg15<kill>
+//     ...
+//     JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead>
+//
+//     Into
+//     %vreg15<def> = CMPGTrr %vreg6, %vreg2;
+//     ...
+//     JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>;
+//
+// Note: The peephole pass makes the instrucstions like
+// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
+// redundant and relies on some form of dead removal instructions, like
+// DCE or DIE to actually eliminate them.
+
+
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-peephole"
+
+static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Peephole Optimization"));
+
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of PNotP"));
+
+static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of Sign/Zero Extends"));
+
+static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of extensions to i64."));
+
+namespace llvm {
+  FunctionPass *createHexagonPeephole();
+  void initializeHexagonPeepholePass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonPeephole : public MachineFunctionPass {
+    const HexagonInstrInfo    *QII;
+    const HexagonRegisterInfo *QRI;
+    const MachineRegisterInfo *MRI;
+
+  public:
+    static char ID;
+    HexagonPeephole() : MachineFunctionPass(ID) {
+      initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    const char *getPassName() const override {
+      return "Hexagon optimize redundant zero and size extends";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
+  };
+}
+
+char HexagonPeephole::ID = 0;
+
+INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
+                false, false)
+
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+  QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  DenseMap<unsigned, unsigned> PeepholeMap;
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap;
+
+  if (DisableHexagonPeephole) return false;
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+    PeepholeMap.clear();
+    PeepholeDoubleRegsMap.clear();
+
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+                                     ++MII) {
+      MachineInstr *MI = MII;
+      // Look for sign extends:
+      // %vreg170<def> = SXTW %vreg166
+      if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        // Just handle virtual registers.
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Map the following:
+          // %vreg170<def> = SXTW %vreg166
+          // PeepholeMap[170] = vreg166
+          PeepholeMap[DstReg] = SrcReg;
+        }
+      }
+
+      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
+      // %vreg170:DoublRegs, %vreg169:IntRegs
+      if (!DisableOptExtTo64 &&
+          MI->getOpcode () == Hexagon::A4_combineir) {
+        assert (MI->getNumOperands() == 3);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src1 = MI->getOperand(1);
+        MachineOperand &Src2 = MI->getOperand(2);
+        if (Src1.getImm() != 0)
+          continue;
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src2.getReg();
+        PeepholeMap[DstReg] = SrcReg;
+      }
+
+      // Look for this sequence below
+      // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
+      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+      // and convert into
+      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+      if (MI->getOpcode() == Hexagon::S2_lsr_i_p) {
+        assert(MI->getNumOperands() == 3);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src1 = MI->getOperand(1);
+        MachineOperand &Src2 = MI->getOperand(2);
+        if (Src2.getImm() != 32)
+          continue;
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src1.getReg();
+        PeepholeDoubleRegsMap[DstReg] =
+          std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
+      }
+
+      // Look for P=NOT(P).
+      if (!DisablePNotP &&
+          (MI->getOpcode() == Hexagon::C2_not)) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        // Just handle virtual registers.
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Map the following:
+          // %vreg170<def> = NOT_xx %vreg166
+          // PeepholeMap[170] = vreg166
+          PeepholeMap[DstReg] = SrcReg;
+        }
+      }
+
+      // Look for copy:
+      // %vreg176<def> = COPY %vreg170:subreg_loreg
+      if (!DisableOptSZExt && MI->isCopy()) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+
+        // Make sure we are copying the lower 32 bits.
+        if (Src.getSubReg() != Hexagon::subreg_loreg)
+          continue;
+
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Try to find in the map.
+          if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
+            // Change the 1st operand.
+            MI->RemoveOperand(1);
+            MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+          } else  {
+            DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
+              PeepholeDoubleRegsMap.find(SrcReg);
+            if (DI != PeepholeDoubleRegsMap.end()) {
+              std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
+              MI->RemoveOperand(1);
+              MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first,
+                                                       false /*isDef*/,
+                                                       false /*isImp*/,
+                                                       false /*isKill*/,
+                                                       false /*isDead*/,
+                                                       false /*isUndef*/,
+                                                       false /*isEarlyClobber*/,
+                                                       PeepholeSrc.second));
+            }
+          }
+        }
+      }
+
+      // Look for Predicated instructions.
+      if (!DisablePNotP) {
+        bool Done = false;
+        if (QII->isPredicated(MI)) {
+          MachineOperand &Op0 = MI->getOperand(0);
+          unsigned Reg0 = Op0.getReg();
+          const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
+          if (RC0->getID() == Hexagon::PredRegsRegClassID) {
+            // Handle instructions that have a prediate register in op0
+            // (most cases of predicable instructions).
+            if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+              // Try to find in the map.
+              if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
+                // Change the 1st operand and, flip the opcode.
+                MI->getOperand(0).setReg(PeepholeSrc);
+                int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
+                MI->setDesc(QII->get(NewOp));
+                Done = true;
+              }
+            }
+          }
+        }
+
+        if (!Done) {
+          // Handle special instructions.
+          unsigned Op = MI->getOpcode();
+          unsigned NewOp = 0;
+          unsigned PR = 1, S1 = 2, S2 = 3;   // Operand indices.
+
+          switch (Op) {
+            case Hexagon::C2_mux:
+            case Hexagon::C2_muxii:
+              NewOp = Op;
+              break;
+            case Hexagon::C2_muxri:
+              NewOp = Hexagon::C2_muxir;
+              break;
+            case Hexagon::C2_muxir:
+              NewOp = Hexagon::C2_muxri;
+              break;
+          }
+          if (NewOp) {
+            unsigned PSrc = MI->getOperand(PR).getReg();
+            if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
+              MI->getOperand(PR).setReg(POrig);
+              MI->setDesc(QII->get(NewOp));
+              // Swap operands S1 and S2.
+              MachineOperand Op1 = MI->getOperand(S1);
+              MachineOperand Op2 = MI->getOperand(S2);
+              ChangeOpInto(MI->getOperand(S1), Op2);
+              ChangeOpInto(MI->getOperand(S2), Op1);
+            }
+          } // if (NewOp)
+        } // if (!Done)
+
+      } // if (!DisablePNotP)
+
+    } // Instruction
+  } // Basic Block
+  return true;
+}
+
+void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
+  assert (&Dst != &Src && "Cannot duplicate into itself");
+  switch (Dst.getType()) {
+    case MachineOperand::MO_Register:
+      if (Src.isReg()) {
+        Dst.setReg(Src.getReg());
+      } else if (Src.isImm()) {
+        Dst.ChangeToImmediate(Src.getImm());
+      } else {
+        llvm_unreachable("Unexpected src operand type");
+      }
+      break;
+
+    case MachineOperand::MO_Immediate:
+      if (Src.isImm()) {
+        Dst.setImm(Src.getImm());
+      } else if (Src.isReg()) {
+        Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
+                             Src.isKill(), Src.isDead(), Src.isUndef(),
+                             Src.isDebug());
+      } else {
+        llvm_unreachable("Unexpected src operand type");
+      }
+      break;
+
+    default:
+      llvm_unreachable("Unexpected dst operand type");
+      break;
+  }
+}
+
+FunctionPass *llvm::createHexagonPeephole() {
+  return new HexagonPeephole();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 0000000..61c0589
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,203 @@
+//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+HexagonRegisterInfo::HexagonRegisterInfo()
+    : HexagonGenRegisterInfo(Hexagon::R31) {}
+
+
+bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const {
+  return R == Hexagon::R0 || R == Hexagon::R1 || R == Hexagon::R2 ||
+         R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1;
+}
+
+bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const {
+  return Hexagon::R16 <= Reg && Reg <= Hexagon::R27;
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
+  static const MCPhysReg CallerSavedRegsV4[] = {
+    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+    Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+    Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+    Hexagon::R15, 0
+  };
+
+  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
+  switch (HST.getHexagonArchVersion()) {
+  case HexagonSubtarget::V4:
+  case HexagonSubtarget::V5:
+  case HexagonSubtarget::V55:
+  case HexagonSubtarget::V60:
+    return CallerSavedRegsV4;
+  }
+  llvm_unreachable(
+    "Callee saved registers requested for unknown archtecture version");
+}
+
+
+const MCPhysReg *
+HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const MCPhysReg CalleeSavedRegsV3[] = {
+    Hexagon::R16,   Hexagon::R17,   Hexagon::R18,   Hexagon::R19,
+    Hexagon::R20,   Hexagon::R21,   Hexagon::R22,   Hexagon::R23,
+    Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+  };
+
+  switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
+  case HexagonSubtarget::V4:
+  case HexagonSubtarget::V5:
+  case HexagonSubtarget::V55:
+  case HexagonSubtarget::V60:
+    return CalleeSavedRegsV3;
+  }
+  llvm_unreachable("Callee saved registers requested for unknown architecture "
+                   "version");
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+  const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(HEXAGON_RESERVED_REG_1);
+  Reserved.set(HEXAGON_RESERVED_REG_2);
+  Reserved.set(Hexagon::R29);
+  Reserved.set(Hexagon::R30);
+  Reserved.set(Hexagon::R31);
+  Reserved.set(Hexagon::PC);
+  Reserved.set(Hexagon::D15);
+  Reserved.set(Hexagon::LC0);
+  Reserved.set(Hexagon::LC1);
+  Reserved.set(Hexagon::SA0);
+  Reserved.set(Hexagon::SA1);
+  return Reserved;
+}
+
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                              int SPAdj, unsigned FIOp,
+                                              RegScavenger *RS) const {
+  //
+  // Hexagon_TODO: Do we need to enforce this for Hexagon?
+  assert(SPAdj == 0 && "Unexpected");
+
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MB = *MI.getParent();
+  MachineFunction &MF = *MB.getParent();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
+  auto &HFI = *HST.getFrameLowering();
+
+  unsigned BP = 0;
+  int FI = MI.getOperand(FIOp).getIndex();
+  // Select the base pointer (BP) and calculate the actual offset from BP
+  // to the beginning of the object at index FI.
+  int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+  // Add the offset from the instruction.
+  int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
+
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    case Hexagon::TFR_FIA:
+      MI.setDesc(HII.get(Hexagon::A2_addi));
+      MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
+      MI.RemoveOperand(FIOp+1);
+      return;
+    case Hexagon::TFR_FI:
+      // Set up the instruction for updating below.
+      MI.setDesc(HII.get(Hexagon::A2_addi));
+      break;
+  }
+
+  if (HII.isValidOffset(Opc, RealOffset)) {
+    MI.getOperand(FIOp).ChangeToRegister(BP, false);
+    MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
+    return;
+  }
+
+#ifndef NDEBUG
+  const Function *F = MF.getFunction();
+  dbgs() << "In function ";
+  if (F) dbgs() << F->getName();
+  else   dbgs() << "<?>";
+  dbgs() << ", BB#" << MB.getNumber() << "\n" << MI;
+#endif
+  llvm_unreachable("Unhandled instruction");
+}
+
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+  return Hexagon::R31;
+}
+
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+                                               &MF) const {
+  const HexagonFrameLowering *TFI = getFrameLowering(MF);
+  if (TFI->hasFP(MF))
+    return getFrameRegister();
+  return getStackRegister();
+}
+
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+  return Hexagon::R30;
+}
+
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+  return Hexagon::R29;
+}
+
+
+bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
+      const {
+  return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
+}
+
+
+unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
+  return Hexagon::R6;
+}
+
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 0000000..db7e0f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,89 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+
+//
+//  We try not to hard code the reserved registers in our code,
+//  so the following two macros were defined. However, there
+//  are still a few places that R11 and R10 are hard wired.
+//  See below. If, in the future, we decided to change the reserved
+//  register. Don't forget changing the following places.
+//
+//  1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+//  2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+//  3. the definition of "IntRegs" in HexagonRegisterInfo.td
+//  4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+class HexagonRegisterInfo : public HexagonGenRegisterInfo {
+public:
+  HexagonRegisterInfo();
+
+  /// Code Generation virtual methods...
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
+        const override;
+
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+        unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
+
+  /// Returns true since we may need scavenging for a temporary register
+  /// when generating hardware loop instructions.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+    return true;
+  }
+
+  /// Returns true. Spill code for predicate registers might need an extra
+  /// register.
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+    return true;
+  }
+
+  /// Returns true if the frame pointer is valid.
+  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+
+  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+    return true;
+  }
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  unsigned getFrameRegister() const;
+  unsigned getStackRegister() const;
+
+  const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const;
+
+  unsigned getFirstCallerSavedNonParamReg() const;
+
+  bool isEHReturnCalleeSaveReg(unsigned Reg) const;
+  bool isCalleeSaveReg(unsigned Reg) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 0000000..81629dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,270 @@
+//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Hexagon" in {
+
+  class HexagonReg<bits<5> num, string n, list<string> alt = [], 
+                   list<Register> alias = []> : Register<n> {
+    field bits<5> Num;
+    let Aliases = alias;
+    let HWEncoding{4-0} = num;
+  }
+
+  class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
+                         list<string> alt = []> :
+        RegisterWithSubRegs<n, subregs> {
+    field bits<5> Num;
+
+    let AltNames = alt;
+    let HWEncoding{4-0} = num;
+  }
+
+  // Registers are identified with 5-bit ID numbers.
+  // Ri - 32-bit integer registers.
+  class Ri<bits<5> num, string n, list<string> alt = []> : HexagonReg<num, n, alt> {
+    let Num = num;
+  }
+
+  // Rf - 32-bit floating-point registers.
+  class Rf<bits<5> num, string n> : HexagonReg<num, n> {
+    let Num = num;
+  }
+
+
+  // Rd - 64-bit registers.
+  class Rd<bits<5> num, string n, list<Register> subregs> :
+        HexagonDoubleReg<num, n, subregs> {
+    let Num = num;
+    let SubRegs = subregs;
+  }
+
+  // Rp - predicate registers
+  class Rp<bits<5> num, string n> : HexagonReg<num, n> {
+    let Num = num;
+  }
+
+
+  // Rq - vector predicate registers
+  class Rq<bits<3> num, string n> : Register<n, []> {
+    let HWEncoding{2-0} = num;
+  }
+
+  // Rc - control registers
+  class Rc<bits<5> num, string n,
+           list<string> alt = [], list<Register> alias = []> : 
+        HexagonReg<num, n, alt, alias> {
+    let Num = num;
+  }
+
+  // Rcc - 64-bit control registers.
+  class Rcc<bits<5> num, string n, list<Register> subregs,
+            list<string> alt = []> :
+        HexagonDoubleReg<num, n, subregs, alt> {
+    let Num = num;
+    let SubRegs = subregs;
+  }
+
+  // Mx - address modifier registers
+  class Mx<bits<1> num, string n> : HexagonReg<{0b0000, num}, n> {
+    let Num = !cast<bits<5>>(num);
+  }
+
+  def subreg_loreg  : SubRegIndex<32>;
+  def subreg_hireg  : SubRegIndex<32, 32>;
+  def subreg_overflow : SubRegIndex<1, 0>;
+
+  // Integer registers.
+  foreach i = 0-28 in {
+    def R#i  : Ri<i, "r"#i>,  DwarfRegNum<[i]>;
+  }
+
+  def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
+  def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
+  def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
+
+  // Aliases of the R* registers used to hold 64-bit int values (doubles).
+  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  def D0  : Rd< 0,  "r1:0",  [R0,  R1]>, DwarfRegNum<[32]>;
+  def D1  : Rd< 2,  "r3:2",  [R2,  R3]>, DwarfRegNum<[34]>;
+  def D2  : Rd< 4,  "r5:4",  [R4,  R5]>, DwarfRegNum<[36]>;
+  def D3  : Rd< 6,  "r7:6",  [R6,  R7]>, DwarfRegNum<[38]>;
+  def D4  : Rd< 8,  "r9:8",  [R8,  R9]>, DwarfRegNum<[40]>;
+  def D5  : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+  def D6  : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+  def D7  : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+  def D8  : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+  def D9  : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+  def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+  def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+  def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+  def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+  def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+  def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+  }
+
+  // Predicate registers.
+  def P0 : Rp<0, "p0">, DwarfRegNum<[63]>;
+  def P1 : Rp<1, "p1">, DwarfRegNum<[64]>;
+  def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
+  def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
+
+  // Modifier registers.
+  // C6 and C7 can also be M0 and M1, but register names must be unique, even
+  // if belonging to different register classes.
+  def M0 : Mx<0, "m0">, DwarfRegNum<[72]>;
+  def M1 : Mx<1, "m1">, DwarfRegNum<[73]>;
+
+  // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc-
+  // tions modify this bit, and multiple such instructions are allowed in the
+  // same packet. We need to ignore output dependencies on this bit, but not
+  // on the entire USR.
+  def USR_OVF : Rc<?, "usr.ovf">;
+
+  // Control registers.
+  def SA0  : Rc<0,  "sa0",       ["c0"]>,   DwarfRegNum<[67]>;
+  def LC0  : Rc<1,  "lc0",       ["c1"]>,   DwarfRegNum<[68]>;
+  def SA1  : Rc<2,  "sa1",       ["c2"]>,   DwarfRegNum<[69]>;
+  def LC1  : Rc<3,  "lc1",       ["c3"]>,   DwarfRegNum<[70]>;
+  def P3_0 : Rc<4,  "p3:0",      ["c4"], [P0, P1, P2, P3]>,
+                                            DwarfRegNum<[71]>;
+  def C5   : Rc<5,  "c5",        ["c5"]>,   DwarfRegNum<[72]>; // future use
+  def C6   : Rc<6,  "c6",        [], [M0]>, DwarfRegNum<[73]>;
+  def C7   : Rc<7,  "c7",        [], [M1]>, DwarfRegNum<[74]>;
+
+  def USR  : Rc<8,  "usr",       ["c8"]>,   DwarfRegNum<[75]> {
+    let SubRegIndices = [subreg_overflow];
+    let SubRegs = [USR_OVF];
+  }
+  def PC   : Rc<9,  "pc">,                  DwarfRegNum<[76]>;
+  def UGP  : Rc<10, "ugp",       ["c10"]>,  DwarfRegNum<[77]>;
+  def GP   : Rc<11, "gp">,                  DwarfRegNum<[78]>;
+  def CS0  : Rc<12, "cs0",       ["c12"]>,  DwarfRegNum<[79]>;
+  def CS1  : Rc<13, "cs1",       ["c13"]>,  DwarfRegNum<[80]>;
+  def UPCL : Rc<14, "upcyclelo", ["c14"]>,  DwarfRegNum<[81]>;
+  def UPCH : Rc<15, "upcyclehi", ["c15"]>,  DwarfRegNum<[82]>;
+}
+
+  // Control registers pairs.
+  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+    def C1_0   : Rcc<0,   "c1:0",  [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+    def C3_2   : Rcc<2,   "c3:2",  [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+    def C7_6   : Rcc<6,   "c7:6",  [C6, C7],   ["m1:0"]>,    DwarfRegNum<[72]>;
+    def C9_8   : Rcc<8,   "c9:8",  [USR, PC]>,               DwarfRegNum<[74]>;
+    def C11_10 : Rcc<10, "c11:10", [UGP, GP]>,               DwarfRegNum<[76]>;
+    def CS     : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>,   DwarfRegNum<[78]>;
+    def UPC    : Rcc<14, "c15:14", [UPCL, UPCH]>,            DwarfRegNum<[80]>;
+  }
+
+  foreach i = 0-31 in {
+    def V#i  : Ri<i, "v"#i>,  DwarfRegNum<[!add(i, 99)]>;
+  }
+
+  // Aliases of the V* registers used to hold double vec values.
+  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  def W0  : Rd< 0,  "v1:0",  [V0,  V1]>,  DwarfRegNum<[99]>;
+  def W1  : Rd< 2,  "v3:2",  [V2,  V3]>,  DwarfRegNum<[101]>;
+  def W2  : Rd< 4,  "v5:4",  [V4,  V5]>,  DwarfRegNum<[103]>;
+  def W3  : Rd< 6,  "v7:6",  [V6,  V7]>,  DwarfRegNum<[105]>;
+  def W4  : Rd< 8,  "v9:8",  [V8,  V9]>,  DwarfRegNum<[107]>;
+  def W5  : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
+  def W6  : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
+  def W7  : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
+  def W8  : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
+  def W9  : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
+  def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
+  def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
+  def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
+  def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
+  def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
+  def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+  }
+
+  // Vector Predicate registers.
+  def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
+  def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
+  def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
+  def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+                            (add (sequence "R%u", 0, 9),
+                                 (sequence "R%u", 12, 28),
+                                 R10, R11, R29, R30, R31)> {
+}
+
+// Registers are listed in reverse order for allocation preference reasons.
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+                                (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+                               (add (sequence "D%u", 0, 4),
+                                    (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
+                               (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs : RegisterClass<"Hexagon",
+                         [v128i8, v64i16, v32i32, v16i64], 1024,
+                               (add (sequence "W%u", 0, 15))>;
+
+def VectorRegs128B : RegisterClass<"Hexagon",
+                         [v128i8, v64i16, v32i32, v16i64], 1024,
+                               (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs128B : RegisterClass<"Hexagon",
+                         [v256i8,v128i16,v64i32,v32i64], 2048,
+                               (add (sequence "W%u", 0, 15))>;
+
+def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512,
+                                (add (sequence "Q%u", 0, 3))>;
+
+def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024,
+                                   (add (sequence "Q%u", 0, 3))>;
+
+def PredRegs : RegisterClass<"Hexagon", 
+                             [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
+                             (add (sequence "P%u", 0, 3))>
+{
+  let Size = 32;
+}
+
+let Size = 32 in
+def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
+
+let Size = 32, isAllocatable = 0 in
+def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
+                           (add LC0, SA0, LC1, SA1,
+                                P3_0,
+                                 M0, M1, C6, C7, CS0, CS1, UPCL, UPCH,
+                                 USR, USR_OVF, UGP, GP, PC)>;
+
+let Size = 64, isAllocatable = 0 in
+def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
+                              (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>;
+
+def VolatileV3 {
+  list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7,
+                         R28, R31,
+                         P0, P1, P2, P3,
+                         M0, M1,
+                         LC0, LC1, SA0, SA1, USR, USR_OVF];
+}
+
+def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a),
+[{
+  return isPositiveHalfWord(N);
+}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 0000000..6e4987b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,24 @@
+//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+// V55 Machine Info +
+include "HexagonScheduleV55.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Machine Info -
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 0000000..67af147
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,206 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+
+// Functional Units.
+def SLOT0       : FuncUnit;
+def SLOT1       : FuncUnit;
+def SLOT2       : FuncUnit;
+def SLOT3       : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
+
+// Itinerary classes.
+def PSEUDO      : InstrItinClass;
+def PSEUDOM     : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def DUPLEX      : InstrItinClass;
+def PREFIX      : InstrItinClass;
+def COMPOUND_CJ_ARCHDEPSLOT    : InstrItinClass;
+def COMPOUND    : InstrItinClass;
+
+def ALU32_2op_tc_1_SLOT0123  : InstrItinClass;
+def ALU32_2op_tc_2early_SLOT0123  : InstrItinClass;
+def ALU32_3op_tc_2early_SLOT0123  : InstrItinClass;
+def ALU32_3op_tc_1_SLOT0123  : InstrItinClass;
+def ALU32_3op_tc_2_SLOT0123  : InstrItinClass;
+def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass;
+def ALU64_tc_1_SLOT23        : InstrItinClass;
+def ALU64_tc_1or2_SLOT23     : InstrItinClass;
+def ALU64_tc_2_SLOT23        : InstrItinClass;
+def ALU64_tc_2early_SLOT23   : InstrItinClass;
+def ALU64_tc_3x_SLOT23       : InstrItinClass;
+def CR_tc_2_SLOT3            : InstrItinClass;
+def CR_tc_2early_SLOT23      : InstrItinClass;
+def CR_tc_2early_SLOT3       : InstrItinClass;
+def CR_tc_3x_SLOT23          : InstrItinClass;
+def CR_tc_3x_SLOT3           : InstrItinClass;
+def J_tc_2early_SLOT23       : InstrItinClass;
+def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT       : InstrItinClass;
+def J_tc_2early_SLOT2        : InstrItinClass;
+def LD_tc_ld_SLOT01          : InstrItinClass;
+def LD_tc_ld_SLOT0           : InstrItinClass;
+def LD_tc_3or4stall_SLOT0    : InstrItinClass;
+def M_tc_1_SLOT23            : InstrItinClass;
+def M_tc_1or2_SLOT23         : InstrItinClass;
+def M_tc_2_SLOT23            : InstrItinClass;
+def M_tc_3_SLOT23            : InstrItinClass;
+def M_tc_3x_SLOT23           : InstrItinClass;
+def M_tc_3or4x_SLOT23        : InstrItinClass;
+def ST_tc_st_SLOT01          : InstrItinClass;
+def ST_tc_st_SLOT0           : InstrItinClass;
+def ST_tc_ld_SLOT0           : InstrItinClass;
+def ST_tc_3stall_SLOT0       : InstrItinClass;
+def S_2op_tc_1_SLOT23        : InstrItinClass;
+def S_2op_tc_2_SLOT23        : InstrItinClass;
+def S_2op_tc_2early_SLOT23   : InstrItinClass;
+def S_2op_tc_3or4x_SLOT23    : InstrItinClass;
+def S_3op_tc_1_SLOT23        : InstrItinClass;
+def S_3op_tc_1or2_SLOT23     : InstrItinClass;
+def S_3op_tc_2_SLOT23        : InstrItinClass;
+def S_3op_tc_2early_SLOT23   : InstrItinClass;
+def S_3op_tc_3_SLOT23        : InstrItinClass;
+def S_3op_tc_3x_SLOT23       : InstrItinClass;
+def NCJ_tc_3or4stall_SLOT0   : InstrItinClass;
+def V2LDST_tc_ld_SLOT01      : InstrItinClass;
+def V2LDST_tc_st_SLOT0       : InstrItinClass;
+def V2LDST_tc_st_SLOT01      : InstrItinClass;
+def V4LDST_tc_ld_SLOT01      : InstrItinClass;
+def V4LDST_tc_st_SLOT0       : InstrItinClass;
+def V4LDST_tc_st_SLOT01      : InstrItinClass;
+def J_tc_2early_SLOT0123     : InstrItinClass;
+def EXTENDER_tc_1_SLOT0123   : InstrItinClass;
+def S_3op_tc_3stall_SLOT23   : InstrItinClass;
+
+
+def HexagonItinerariesV4 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+        // ALU32
+        InstrItinData<ALU32_2op_tc_1_SLOT0123  ,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_1_SLOT0123   ,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2_SLOT0123   ,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_ADDI_tc_1_SLOT0123  ,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // ALU64
+        InstrItinData<ALU64_tc_1_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_1or2_SLOT23   , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_3x_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // CR -> System
+        InstrItinData<CR_tc_2_SLOT3          , [InstrStage<1, [SLOT3]>]>,
+        InstrItinData<CR_tc_2early_SLOT3     , [InstrStage<1, [SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT3         , [InstrStage<1, [SLOT3]>]>,
+
+        // Jump (conditional/unconditional/return etc)
+        // CR
+        InstrItinData<CR_tc_2early_SLOT23    , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT23        , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        // J
+        InstrItinData<J_tc_2early_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        // JR
+        InstrItinData<J_tc_2early_SLOT2      , [InstrStage<1, [SLOT2]>]>,
+
+        //Load
+        InstrItinData<LD_tc_ld_SLOT01        , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD_tc_ld_SLOT0         , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<LD_tc_3or4stall_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+
+        // M
+        InstrItinData<M_tc_1_SLOT23          , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_1or2_SLOT23       , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_2_SLOT23          , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3_SLOT23          , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3x_SLOT23         , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3or4x_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // Store
+        // ST
+        InstrItinData<ST_tc_st_SLOT01        , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        // ST0
+        InstrItinData<ST_tc_st_SLOT0         , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<ST_tc_ld_SLOT0         , [InstrStage<1, [SLOT0]>]>,
+
+        // S
+        InstrItinData<S_2op_tc_1_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_3or4x_SLOT23  , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1or2_SLOT23   , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3_SLOT23      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3x_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // SYS
+        InstrItinData<ST_tc_3stall_SLOT0     , [InstrStage<1, [SLOT0]>]>,
+
+        // New Value Compare Jump
+        InstrItinData<NCJ_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+        // Mem ops - MEM_V4
+        InstrItinData<V2LDST_tc_st_SLOT0     , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V2LDST_tc_ld_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V2LDST_tc_st_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT0     , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V4LDST_tc_ld_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+
+        // ENDLOOP
+        InstrItinData<J_tc_2early_SLOT0123   , [InstrStage<1, [SLOT_ENDLOOP]>]>,
+
+        // Extender/PREFIX
+        InstrItinData<EXTENDER_tc_1_SLOT0123,
+                     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                InstrStage<1, [SLOT2, SLOT3]>]>
+      ]>;
+
+def HexagonModelV4 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV4;
+  let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
new file mode 100644
index 0000000..d9ad25d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -0,0 +1,170 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+
+def CJ_tc_1_SLOT23              : InstrItinClass;
+def CJ_tc_2early_SLOT23         : InstrItinClass;
+def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass;
+def COPROC_VX_vtc_long_SLOT23   : InstrItinClass;
+def COPROC_VX_vtc_SLOT23        : InstrItinClass;
+def J_tc_3stall_SLOT2           : InstrItinClass;
+def MAPPING_tc_1_SLOT0123       : InstrItinClass;
+def M_tc_3stall_SLOT23          : InstrItinClass;
+def SUBINSN_tc_1_SLOT01         : InstrItinClass;
+def SUBINSN_tc_2early_SLOT0     : InstrItinClass;
+def SUBINSN_tc_2early_SLOT01    : InstrItinClass;
+def SUBINSN_tc_3stall_SLOT0     : InstrItinClass;
+def SUBINSN_tc_ld_SLOT0         : InstrItinClass;
+def SUBINSN_tc_ld_SLOT01        : InstrItinClass;
+def SUBINSN_tc_st_SLOT01        : InstrItinClass;
+
+def HexagonItinerariesV55 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+        // ALU32
+        InstrItinData<ALU32_2op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2_SLOT0123     ,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_ADDI_tc_1_SLOT0123    ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // ALU64
+        InstrItinData<ALU64_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // CR -> System
+        InstrItinData<CR_tc_2_SLOT3      , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT3     , [InstrStage<3, [SLOT3]>]>,
+
+        // Jump (conditional/unconditional/return etc)
+        InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // JR
+        InstrItinData<J_tc_2early_SLOT2  , [InstrStage<2, [SLOT2]>]>,
+        InstrItinData<J_tc_3stall_SLOT2  , [InstrStage<3, [SLOT2]>]>,
+
+        // Extender
+        InstrItinData<EXTENDER_tc_1_SLOT0123,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Load
+        InstrItinData<LD_tc_ld_SLOT01      , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<LD_tc_ld_SLOT0       , [InstrStage<3, [SLOT0]>]>,
+
+        // M
+        InstrItinData<M_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // Store
+        InstrItinData<ST_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_st_SLOT0    , [InstrStage<1, [SLOT0]>]>,
+
+        // Subinsn
+        InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_1_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_2early_SLOT01,
+                              [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT01   , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // S
+        InstrItinData<S_2op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // New Value Compare Jump
+        InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+
+        // Mem ops
+        InstrItinData<V2LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // Endloop
+        InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+        // Vector
+        InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+                      [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<COPROC_VX_vtc_long_SLOT23  ,
+                      [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<COPROC_VX_vtc_SLOT23 ,
+                      [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<MAPPING_tc_1_SLOT0123      ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Misc
+        InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                InstrStage<1, [SLOT2, SLOT3]>]>
+
+      ]>;
+
+def HexagonModelV55 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV55;
+  let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
new file mode 100644
index 0000000..2ccff82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -0,0 +1,310 @@
+//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec".
+def CVI_ST     : FuncUnit;
+def CVI_XLANE  : FuncUnit;
+def CVI_SHIFT  : FuncUnit;
+def CVI_MPY0   : FuncUnit;
+def CVI_MPY1   : FuncUnit;
+def CVI_LD     : FuncUnit;
+
+// Combined functional units.
+def CVI_XLSHF  : FuncUnit;
+def CVI_MPY01  : FuncUnit;
+def CVI_ALL    : FuncUnit;
+
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+    ComboFuncUnits<[
+      ComboFuncData<CVI_XLSHF    , [CVI_XLANE, CVI_SHIFT]>,
+      ComboFuncData<CVI_MPY01    , [CVI_MPY0, CVI_MPY1]>,
+      ComboFuncData<CVI_ALL      , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+                                    CVI_MPY0, CVI_MPY1, CVI_LD]>
+    ]>;
+
+// Note: When adding additional vector scheduling classes, add the
+// corresponding methods to the class HexagonInstrInfo.
+def CVI_VA           : InstrItinClass;
+def CVI_VA_DV        : InstrItinClass;
+def CVI_VX_LONG      : InstrItinClass;
+def CVI_VX_LATE      : InstrItinClass;
+def CVI_VX           : InstrItinClass;
+def CVI_VX_DV_LONG   : InstrItinClass;
+def CVI_VX_DV        : InstrItinClass;
+def CVI_VX_DV_SLOT2  : InstrItinClass;
+def CVI_VP           : InstrItinClass;
+def CVI_VP_LONG      : InstrItinClass;
+def CVI_VP_VS_EARLY  : InstrItinClass;
+def CVI_VP_VS_LONG_EARLY   : InstrItinClass;
+def CVI_VP_VS_LONG   : InstrItinClass;
+def CVI_VP_VS   : InstrItinClass;
+def CVI_VP_DV        : InstrItinClass;
+def CVI_VS           : InstrItinClass;
+def CVI_VINLANESAT   : InstrItinClass;
+def CVI_VM_LD        : InstrItinClass;
+def CVI_VM_TMP_LD    : InstrItinClass;
+def CVI_VM_CUR_LD    : InstrItinClass;
+def CVI_VM_VP_LDU    : InstrItinClass;
+def CVI_VM_ST        : InstrItinClass;
+def CVI_VM_NEW_ST    : InstrItinClass;
+def CVI_VM_STU       : InstrItinClass;
+def CVI_HIST         : InstrItinClass;
+def CVI_VA_EXT       : InstrItinClass;
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine.
+// This file describes that machine information.
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+//
+//
+// In addition to using the above SLOTS, there are also six vector pipelines
+// in the CVI co-processor in the Hexagon V60 machine.
+//
+//      |=========| |=========| |=========| |=========| |=========| |=========|
+// SLOT | CVI_LD  | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST  |
+// ==== |=========| |=========| |=========| |=========| |=========| |=========|
+// S0-3 |         | | CVI_VA  | | CVI_VA  | | CVI_VA  | | CVI_VA  | |         |
+// S2-3 |         | | CVI_VX  | | CVI_VX  | |         | |         | |         |
+// S0-3 |         | |         | |         | |         | | CVI_VP  | |         |
+// S0-3 |         | |         | |         | | CVI_VS  | |         | |         |
+// S0-1 |(CVI_LD) | | CVI_LD  | | CVI_LD  | | CVI_LD  | | CVI_LD  | |         |
+// S0-1 |(C*TMP_LD) |         | |         | |         | |         | |         |
+// S01  |(C*_LDU) | |         | |         | |         | | C*_LDU  | |         |
+// S0   |         | | CVI_ST  | | CVI_ST  | | CVI_ST  | | CVI_ST  | |(CVI_ST) |
+// S0   |         | |         | |         | |         | |         | |(C*TMP_ST)
+// S01  |         | |         | |         | |         | | VSTU    | |(C*_STU) |
+//      |=========| |=========| |=========| |=========| |=========| |=========|
+//                  |=====================| |=====================|
+//                  | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT|
+//                  |=====================| |=====================|
+// S0-3             | CVI_VA_DV           | | CVI_VA_DV           |
+// S0-3             |                     | | CVI_VP_DV           |
+// S2-3             | CVI_VX_DV           | |                     |
+//                  |=====================| |=====================|
+//      |=====================================================================|
+// S0-3 | CVI_HIST   Histogram                                                |
+// S0123| CVI_VA_EXT Extract                                                  |
+//      |=====================================================================|
+
+def HexagonItinerariesV60 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [
+        // ALU32
+        InstrItinData<ALU32_2op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2_SLOT0123     ,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_ADDI_tc_1_SLOT0123    ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // ALU64
+        InstrItinData<ALU64_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // CR -> System
+        InstrItinData<CR_tc_2_SLOT3      , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT3     , [InstrStage<3, [SLOT3]>]>,
+
+        // Jump (conditional/unconditional/return etc)
+        InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // JR
+        InstrItinData<J_tc_2early_SLOT2  , [InstrStage<2, [SLOT2]>]>,
+        InstrItinData<J_tc_3stall_SLOT2  , [InstrStage<3, [SLOT2]>]>,
+
+        // Extender
+        InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+                              [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Load
+        InstrItinData<LD_tc_ld_SLOT01      , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+        InstrItinData<LD_tc_ld_SLOT0       , [InstrStage<3, [SLOT0]>]>,
+
+        // M
+        InstrItinData<M_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // Store
+        InstrItinData<ST_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_st_SLOT0    , [InstrStage<1, [SLOT0]>]>,
+
+        // Subinsn
+        InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_1_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_2early_SLOT01,
+                                               [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT01   , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // S
+        InstrItinData<S_2op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+        InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // New Value Compare Jump
+        InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+
+        // Mem ops
+        InstrItinData<V2LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // Endloop
+        InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+        // Vector
+        InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+                             [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<COPROC_VX_vtc_long_SLOT23  ,
+                             [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<COPROC_VX_vtc_SLOT23 ,
+                             [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<MAPPING_tc_1_SLOT0123      ,
+                             [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Duplex and Compound
+        InstrItinData<DUPLEX     , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<COMPOUND_CJ_ARCHDEPSLOT   , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        // Misc
+        InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM    , [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // Latest CVI spec definitions.
+        InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VA_DV,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX_DV_LONG,
+                                   [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_DV,
+                                   [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_DV_SLOT2,
+                                   [InstrStage<1, [SLOT2], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VP,      [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VP_VS_EARLY,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS_LONG,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS_LONG_EARLY,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_DV  , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VS,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_SHIFT]>]>,
+        InstrItinData<CVI_VINLANESAT,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_SHIFT]>]>,
+        InstrItinData<CVI_VM_LD  , [InstrStage<1, [SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD]>]>,
+        InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>,
+                                    InstrStage<1, [SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VM_ST  , [InstrStage<1, [SLOT0], 0>,
+                                    InstrStage<1, [CVI_ST], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>,
+                                    InstrStage<1, [CVI_ST]>]>,
+        InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>,
+                                    InstrStage<1, [SLOT1], 0>,
+                                    InstrStage<1, [CVI_ST], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_HIST   , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_ALL]>]>
+      ]>;
+
+def HexagonModelV60 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV60;
+  let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V60 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 0000000..d8feb89
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGT)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGT)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULT)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
+                                         (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLT)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
+                                        (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGE)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGE)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+//  pt = not(p1 xor p2)
+//  Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+                   IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+//   combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+//           mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETGT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETLT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                     (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                      (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 0000000..239dbda
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,63 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
+                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+                        bool isVolatile, bool AlwaysInline,
+                        MachinePointerInfo DstPtrInfo,
+                        MachinePointerInfo SrcPtrInfo) const {
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+    return SDValue();
+
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (SizeVal < 32 || (SizeVal % 8) != 0)
+    return SDValue();
+
+  // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
+  //
+  const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+  Entry.Node = Src;
+  Args.push_back(Entry);
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  const char *SpecialMemcpyName =
+      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                 Type::getVoidTy(*DAG.getContext()),
+                 DAG.getTargetExternalSymbol(
+                     SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+                 std::move(Args), 0)
+      .setDiscardResult();
+
+  std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+  return CallResult.second;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 0000000..80ac5d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,35 @@
+//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
new file mode 100644
index 0000000..10fe606
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -0,0 +1,168 @@
+//=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When the compiler is invoked with no small data, for instance, with the -G0
+// command line option, then all CONST32_* opcodes should be broken down into
+// appropriate LO and HI instructions. This splitting is done by this pass.
+// The only reason this is not done in the DAG lowering itself is that there
+// is no simple way of getting the register allocator to allot the same hard
+// register to the result of LO and HI instructions. This pass is always
+// scheduled after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "xfer"
+
+namespace llvm {
+  FunctionPass *createHexagonSplitConst32AndConst64();
+  void initializeHexagonSplitConst32AndConst64Pass(PassRegistry&);
+}
+
+namespace {
+
+class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
+ public:
+    static char ID;
+    HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {}
+
+    const char *getPassName() const override {
+      return "Hexagon Split Const32s and Const64s";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+
+char HexagonSplitConst32AndConst64::ID = 0;
+
+
+bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
+
+  const HexagonTargetObjectFile &TLOF =
+      *static_cast<const HexagonTargetObjectFile *>(
+          Fn.getTarget().getObjFileLowering());
+  if (TLOF.IsSmallDataEnabled())
+    return true;
+
+  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+    // Traverse the basic block
+    MachineBasicBlock::iterator MII = MBB->begin();
+    MachineBasicBlock::iterator MIE = MBB->end ();
+    while (MII != MIE) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::CONST32_Int_Real &&
+          MI->getOperand(1).isBlockAddress()) {
+        int DestReg = MI->getOperand(0).getReg();
+        MachineOperand &Symbol = MI->getOperand (1);
+
+        BuildMI (*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
+        BuildMI (*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
+        // MBB->erase returns the iterator to the next instruction, which is the
+        // one we want to process next
+        MII = MBB->erase (MI);
+        continue;
+      }
+
+      else if (Opc == Hexagon::CONST32_Int_Real ||
+               Opc == Hexagon::CONST32_Float_Real) {
+        int DestReg = MI->getOperand(0).getReg();
+
+        // We have to convert an FP immediate into its corresponding integer
+        // representation
+        int64_t ImmValue;
+        if (Opc == Hexagon::CONST32_Float_Real) {
+          APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+          ImmValue = *Val.bitcastToAPInt().getRawData();
+        }
+        else
+          ImmValue = MI->getOperand(1).getImm();
+
+        BuildMI(*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
+        MII = MBB->erase (MI);
+        continue;
+      }
+      else if (Opc == Hexagon::CONST64_Int_Real ||
+               Opc == Hexagon::CONST64_Float_Real) {
+        int DestReg = MI->getOperand(0).getReg();
+
+        // We have to convert an FP immediate into its corresponding integer
+        // representation
+        int64_t ImmValue;
+        if (Opc == Hexagon::CONST64_Float_Real) {
+          APFloat Val =  MI->getOperand(1).getFPImm()->getValueAPF();
+          ImmValue = *Val.bitcastToAPInt().getRawData();
+        }
+        else
+          ImmValue = MI->getOperand(1).getImm();
+
+        unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+        unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
+
+        int32_t LowWord = (ImmValue & 0xFFFFFFFF);
+        int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
+
+        BuildMI(*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
+        BuildMI (*MBB, MII, MI->getDebugLoc(),
+                 TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
+        MII = MBB->erase (MI);
+        continue;
+      }
+      ++MII;
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *
+llvm::createHexagonSplitConst32AndConst64() {
+  return new HexagonSplitConst32AndConst64();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
new file mode 100644
index 0000000..d4e95b0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -0,0 +1,1209 @@
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonSplitDoubleRegs();
+  void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+  static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+      cl::desc("Maximum number of split partitions"));
+  static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+      cl::desc("Do not split loads or stores"));
+
+  class HexagonSplitDoubleRegs : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+        TII(nullptr) {
+      initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon Split Double Registers";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    static const TargetRegisterClass *const DoubleRC;
+
+    const HexagonRegisterInfo *TRI;
+    const HexagonInstrInfo *TII;
+    const MachineLoopInfo *MLI;
+    MachineRegisterInfo *MRI;
+
+    typedef std::set<unsigned> USet;
+    typedef std::map<unsigned,USet> UUSetMap;
+    typedef std::pair<unsigned,unsigned> UUPair;
+    typedef std::map<unsigned,UUPair> UUPairMap;
+    typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+    bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+    bool isVolatileInstr(const MachineInstr *MI) const;
+    bool isFixedInstr(const MachineInstr *MI) const;
+    void partitionRegisters(UUSetMap &P2Rs);
+    int32_t profit(const MachineInstr *MI) const;
+    bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+    void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+    void collectIndRegs(LoopRegMap &IRM);
+
+    void createHalfInstr(unsigned Opc, MachineInstr *MI,
+        const UUPairMap &PairMap, unsigned SubR);
+    void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+    bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+    void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+    void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+    bool splitPartition(const USet &Part);
+
+    static int Counter;
+    static void dump_partition(raw_ostream&, const USet&,
+       const TargetRegisterInfo&);
+  };
+  char HexagonSplitDoubleRegs::ID;
+  int HexagonSplitDoubleRegs::Counter = 0;
+  const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+      = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+  "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+  assert(R.isReg());
+  return getDefRegState(R.isDef()) |
+         getImplRegState(R.isImplicit()) |
+         getKillRegState(R.isKill()) |
+         getDeadRegState(R.isDead()) |
+         getUndefRegState(R.isUndef()) |
+         getInternalReadRegState(R.isInternalRead()) |
+         (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+      const USet &Part, const TargetRegisterInfo &TRI) {
+  dbgs() << '{';
+  for (auto I : Part)
+    dbgs() << ' ' << PrintReg(I, &TRI);
+  dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+  for (auto I : IRM) {
+    const USet &Rs = I.second;
+    if (Rs.find(Reg) != Rs.end())
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+  for (auto &I : MI->memoperands())
+    if (I->isVolatile())
+      return true;
+  return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+  if (MI->mayLoad() || MI->mayStore())
+    if (MemRefsFixed || isVolatileInstr(MI))
+      return true;
+  if (MI->isDebugValue())
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    default:
+      return true;
+
+    case TargetOpcode::PHI:
+    case TargetOpcode::COPY:
+      break;
+
+    case Hexagon::L2_loadrd_io:
+      // Not handling stack stores (only reg-based addresses).
+      if (MI->getOperand(1).isReg())
+        break;
+      return true;
+    case Hexagon::S2_storerd_io:
+      // Not handling stack stores (only reg-based addresses).
+      if (MI->getOperand(0).isReg())
+        break;
+      return true;
+    case Hexagon::L2_loadrd_pi:
+    case Hexagon::S2_storerd_pi:
+
+    case Hexagon::A2_tfrpi:
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineii:
+    case Hexagon::A4_combineri:
+    case Hexagon::A2_combinew:
+    case Hexagon::CONST64_Int_Real:
+
+    case Hexagon::A2_sxtw:
+
+    case Hexagon::A2_andp:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_xorp:
+    case Hexagon::S2_asl_i_p_or:
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_lsr_i_p:
+      break;
+  }
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      return true;
+  }
+  return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+  typedef std::map<unsigned,unsigned> UUMap;
+  typedef std::vector<unsigned> UVect;
+
+  unsigned NumRegs = MRI->getNumVirtRegs();
+  BitVector DoubleRegs(NumRegs);
+  for (unsigned i = 0; i < NumRegs; ++i) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI->getRegClass(R) == DoubleRC)
+      DoubleRegs.set(i);
+  }
+
+  BitVector FixedRegs(NumRegs);
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    MachineInstr *DefI = MRI->getVRegDef(R);
+    // In some cases a register may exist, but never be defined or used.
+    // It should never appear anywhere, but mark it as "fixed", just to be
+    // safe.
+    if (!DefI || isFixedInstr(DefI))
+      FixedRegs.set(x);
+  }
+
+  UUSetMap AssocMap;
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    if (FixedRegs[x])
+      continue;
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+    USet &Asc = AssocMap[R];
+    for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+         U != Z; ++U) {
+      MachineOperand &Op = *U;
+      MachineInstr *UseI = Op.getParent();
+      if (isFixedInstr(UseI))
+        continue;
+      for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+        MachineOperand &MO = UseI->getOperand(i);
+        // Skip non-registers or registers with subregisters.
+        if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+          continue;
+        unsigned T = MO.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(T)) {
+          FixedRegs.set(x);
+          continue;
+        }
+        if (MRI->getRegClass(T) != DoubleRC)
+          continue;
+        unsigned u = TargetRegisterInfo::virtReg2Index(T);
+        if (FixedRegs[u])
+          continue;
+        DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+        Asc.insert(T);
+        // Make it symmetric.
+        AssocMap[T].insert(R);
+      }
+    }
+    DEBUG(dbgs() << '\n');
+  }
+
+  UUMap R2P;
+  unsigned NextP = 1;
+  USet Visited;
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    if (Visited.count(R))
+      continue;
+    // Create a new partition for R.
+    unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+    UVect WorkQ;
+    WorkQ.push_back(R);
+    for (unsigned i = 0; i < WorkQ.size(); ++i) {
+      unsigned T = WorkQ[i];
+      if (Visited.count(T))
+        continue;
+      R2P[T] = ThisP;
+      Visited.insert(T);
+      // Add all registers associated with T.
+      USet &Asc = AssocMap[T];
+      for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+        WorkQ.push_back(*J);
+    }
+  }
+
+  for (auto I : R2P)
+    P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+  int32_t P = 0;
+  bool LoZ1 = false, HiZ1 = false;
+  if (Lo == 0 || Lo == 0xFFFFFFFF)
+    P += 10, LoZ1 = true;
+  if (Hi == 0 || Hi == 0xFFFFFFFF)
+    P += 10, HiZ1 = true;
+  if (!LoZ1 && !HiZ1 && Lo == Hi)
+    P += 3;
+  return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+  unsigned ImmX = 0;
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::PHI:
+      for (const auto &Op : MI->operands())
+        if (!Op.getSubReg())
+          return 0;
+      return 10;
+    case TargetOpcode::COPY:
+      if (MI->getOperand(1).getSubReg() != 0)
+        return 10;
+      return 0;
+
+    case Hexagon::L2_loadrd_io:
+    case Hexagon::S2_storerd_io:
+      return -1;
+    case Hexagon::L2_loadrd_pi:
+    case Hexagon::S2_storerd_pi:
+      return 2;
+
+    case Hexagon::A2_tfrpi:
+    case Hexagon::CONST64_Int_Real: {
+      uint64_t D = MI->getOperand(1).getImm();
+      unsigned Lo = D & 0xFFFFFFFFULL;
+      unsigned Hi = D >> 32;
+      return profitImm(Lo, Hi);
+    }
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+      return profitImm(MI->getOperand(1).getImm(),
+                       MI->getOperand(2).getImm());
+    case Hexagon::A4_combineri:
+      ImmX++;
+    case Hexagon::A4_combineir: {
+      ImmX++;
+      int64_t V = MI->getOperand(ImmX).getImm();
+      if (V == 0 || V == -1)
+        return 10;
+      // Fall through into A2_combinew.
+    }
+    case Hexagon::A2_combinew:
+      return 2;
+
+    case Hexagon::A2_sxtw:
+      return 3;
+
+    case Hexagon::A2_andp:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_xorp:
+      return 1;
+
+    case Hexagon::S2_asl_i_p_or: {
+      unsigned S = MI->getOperand(3).getImm();
+      if (S == 0 || S == 32)
+        return 10;
+      return -1;
+    }
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_lsr_i_p:
+      unsigned S = MI->getOperand(2).getImm();
+      if (S == 0 || S == 32)
+        return 10;
+      if (S == 16)
+        return 5;
+      if (S == 48)
+        return 7;
+      return -10;
+  }
+
+  return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+      const {
+  unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+  int32_t TotalP = 0;
+
+  for (unsigned DR : Part) {
+    MachineInstr *DefI = MRI->getVRegDef(DR);
+    int32_t P = profit(DefI);
+    if (P == INT_MIN)
+      return false;
+    TotalP += P;
+    // Reduce the profitability of splitting induction registers.
+    if (isInduction(DR, IRM))
+      TotalP -= 30;
+
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U) {
+      MachineInstr *UseI = U->getParent();
+      if (isFixedInstr(UseI)) {
+        FixedNum++;
+        // Calculate the cost of generating REG_SEQUENCE instructions.
+        for (auto &Op : UseI->operands()) {
+          if (Op.isReg() && Part.count(Op.getReg()))
+            if (Op.getSubReg())
+              TotalP -= 2;
+        }
+        continue;
+      }
+      // If a register from this partition is used in a fixed instruction,
+      // and there is also a register in this partition that is used in
+      // a loop phi node, then decrease the splitting profit as this can
+      // confuse the modulo scheduler.
+      if (UseI->isPHI()) {
+        const MachineBasicBlock *PB = UseI->getParent();
+        const MachineLoop *L = MLI->getLoopFor(PB);
+        if (L && L->getHeader() == PB)
+          LoopPhiNum++;
+      }
+      // Splittable instruction.
+      SplitNum++;
+      int32_t P = profit(UseI);
+      if (P == INT_MIN)
+        return false;
+      TotalP += P;
+    }
+  }
+
+  if (FixedNum > 0 && LoopPhiNum > 0)
+    TotalP -= 20*LoopPhiNum;
+
+  DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+  return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+      USet &Rs) {
+  const MachineBasicBlock *HB = L->getHeader();
+  const MachineBasicBlock *LB = L->getLoopLatch();
+  if (!HB || !LB)
+    return;
+
+  // Examine the latch branch. Expect it to be a conditional branch to
+  // the header (either "br-cond header" or "br-cond exit; br header").
+  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+  SmallVector<MachineOperand,2> Cond;
+  bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+  // Only analyzable conditional branches. HII::AnalyzeBranch will put
+  // the branch opcode as the first element of Cond, and the predicate
+  // operand as the second.
+  if (BadLB || Cond.size() != 2)
+    return;
+  // Only simple jump-conditional (with or without negation).
+  if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+    return;
+  // Must go to the header.
+  if (TB != HB && FB != HB)
+    return;
+  assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+  // Expect a predicate register.
+  unsigned PR = Cond[1].getReg();
+  assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+  // Get the registers on which the loop controlling compare instruction
+  // depends.
+  unsigned CmpR1 = 0, CmpR2 = 0;
+  const MachineInstr *CmpI = MRI->getVRegDef(PR);
+  while (CmpI->getOpcode() == Hexagon::C2_not)
+    CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+  int Mask = 0, Val = 0;
+  bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+  if (!OkCI)
+    return;
+  // Eliminate non-double input registers.
+  if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+    CmpR1 = 0;
+  if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+    CmpR2 = 0;
+  if (!CmpR1 && !CmpR2)
+    return;
+
+  // Now examine the top of the loop: the phi nodes that could poten-
+  // tially define loop induction registers. The registers defined by
+  // such a phi node would be used in a 64-bit add, which then would
+  // be used in the loop compare instruction.
+
+  // Get the set of all double registers defined by phi nodes in the
+  // loop header.
+  typedef std::vector<unsigned> UVect;
+  UVect DP;
+  for (auto &MI : *HB) {
+    if (!MI.isPHI())
+      break;
+    const MachineOperand &MD = MI.getOperand(0);
+    unsigned R = MD.getReg();
+    if (MRI->getRegClass(R) == DoubleRC)
+      DP.push_back(R);
+  }
+  if (DP.empty())
+    return;
+
+  auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+    for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+         I != E; ++I) {
+      const MachineInstr *UseI = I->getParent();
+      if (UseI->getOpcode() != Hexagon::A2_addp)
+        continue;
+      // Get the output from the add. If it is one of the inputs to the
+      // loop-controlling compare instruction, then R is likely an induc-
+      // tion register.
+      unsigned T = UseI->getOperand(0).getReg();
+      if (T == CmpR1 || T == CmpR2)
+        return false;
+    }
+    return true;
+  };
+  UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+  Rs.insert(DP.begin(), End);
+  Rs.insert(CmpR1);
+  Rs.insert(CmpR2);
+
+  DEBUG({
+    dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+    dump_partition(dbgs(), Rs, *TRI);
+    dbgs() << '\n';
+  });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+  typedef std::vector<MachineLoop*> LoopVector;
+  LoopVector WorkQ;
+
+  for (auto I : *MLI)
+    WorkQ.push_back(I);
+  for (unsigned i = 0; i < WorkQ.size(); ++i) {
+    for (auto I : *WorkQ[i])
+      WorkQ.push_back(I);
+  }
+
+  USet Rs;
+  for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+    MachineLoop *L = WorkQ[i];
+    Rs.clear();
+    collectIndRegsForLoop(L, Rs);
+    if (!Rs.empty())
+      IRM.insert(std::make_pair(L, Rs));
+  }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+      const UUPairMap &PairMap, unsigned SubR) {
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg()) {
+      NewI->addOperand(Op);
+      continue;
+    }
+    // For register operands, set the subregister.
+    unsigned R = Op.getReg();
+    unsigned SR = Op.getSubReg();
+    bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+    bool isKill = Op.isKill();
+    if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+      isKill = false;
+      UUPairMap::const_iterator F = PairMap.find(R);
+      if (F == PairMap.end()) {
+        SR = SubR;
+      } else {
+        const UUPair &P = F->second;
+        R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+        SR = 0;
+      }
+    }
+    auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+          Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+          Op.isInternalRead());
+    NewI->addOperand(CO);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  bool Load = MI->mayLoad();
+  unsigned OrigOpc = MI->getOpcode();
+  bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+                  OrigOpc == Hexagon::S2_storerd_pi);
+  MachineInstr *LowI, *HighI;
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Index of the base-address-register operand.
+  unsigned AdrX = PostInc ? (Load ? 2 : 1)
+                          : (Load ? 1 : 0);
+  MachineOperand &AdrOp = MI->getOperand(AdrX);
+  unsigned RSA = getRegState(AdrOp);
+  MachineOperand &ValOp = Load ? MI->getOperand(0)
+                               : (PostInc ? MI->getOperand(3)
+                                          : MI->getOperand(2));
+  UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+  assert(F != PairMap.end());
+
+  if (Load) {
+    const UUPair &P = F->second;
+    int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+             .addImm(Off);
+    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+              .addImm(Off+4);
+  } else {
+    const UUPair &P = F->second;
+    int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+             .addImm(Off)
+             .addReg(P.first);
+    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+              .addImm(Off+4)
+              .addReg(P.second);
+  }
+
+  if (PostInc) {
+    // Create the increment of the address register.
+    int64_t Inc = Load ? MI->getOperand(3).getImm()
+                       : MI->getOperand(2).getImm();
+    MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+    const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+    unsigned NewR = MRI->createVirtualRegister(RC);
+    assert(!UpdOp.getSubReg() && "Def operand with subreg");
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+      .addReg(AdrOp.getReg(), RSA)
+      .addImm(Inc);
+    MRI->replaceRegWith(UpdOp.getReg(), NewR);
+    // The original instruction will be deleted later.
+  }
+
+  // Generate a new pair of memory-operands.
+  MachineFunction &MF = *B.getParent();
+  for (auto &MO : MI->memoperands()) {
+    const MachinePointerInfo &Ptr = MO->getPointerInfo();
+    unsigned F = MO->getFlags();
+    int A = MO->getAlignment();
+
+    auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+    LowI->addMemOperand(MF, Tmp1);
+    auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+    HighI->addMemOperand(MF, Tmp2);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  assert(Op0.isReg() && Op1.isImm());
+  uint64_t V = Op1.getImm();
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+
+  // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+  // values in MachineOperand are stored as 64-bit integers, and so the
+  // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+  // will have the 32 higher bits truncated in the end, but -1 will remain
+  // as -1, while the latter may appear to be a large unsigned value
+  // requiring a constant extender. The casting to int32_t will select the
+  // former representation. (The same reasoning applies to all 32-bit
+  // values.)
+  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+    .addImm(int32_t(V & 0xFFFFFFFFULL));
+  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+    .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  assert(Op0.isReg());
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+
+  if (Op1.isImm()) {
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+      .addImm(Op1.getImm());
+  } else if (Op1.isReg()) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+      .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+  } else
+    llvm_unreachable("Unexpected operand");
+
+  if (Op2.isImm()) {
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+      .addImm(Op2.getImm());
+  } else if (Op2.isReg()) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+      .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+  } else
+    llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  assert(Op0.isReg() && Op1.isReg());
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned RS = getRegState(Op1);
+
+  BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+    .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+  BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+    .addReg(Op1.getReg(), RS, Op1.getSubReg())
+    .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+  int64_t Sh64 = Op2.getImm();
+  assert(Sh64 >= 0 && Sh64 < 64);
+  unsigned S = Sh64;
+
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned LoR = P.first;
+  unsigned HiR = P.second;
+  using namespace Hexagon;
+
+  unsigned Opc = MI->getOpcode();
+  bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+  bool Left = !Right;
+  bool Signed = (Opc == S2_asr_i_p);
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RS = getRegState(Op1);
+  unsigned ShiftOpc = Left ? S2_asl_i_r
+                           : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+  unsigned LoSR = subreg_loreg;
+  unsigned HiSR = subreg_hireg;
+
+  if (S == 0) {
+    // No shift, subregister copy.
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+      .addReg(Op1.getReg(), RS, HiSR);
+  } else if (S < 32) {
+    const TargetRegisterClass *IntRC = &IntRegsRegClass;
+    unsigned TmpR = MRI->createVirtualRegister(IntRC);
+    // Expansion:
+    // Shift left:    DR = shl R, #s
+    //   LoR  = shl R.lo, #s
+    //   TmpR = extractu R.lo, #s, #32-s
+    //   HiR  = or (TmpR, asl(R.hi, #s))
+    // Shift right:   DR = shr R, #s
+    //   HiR  = shr R.hi, #s
+    //   TmpR = shr R.lo, #s
+    //   LoR  = insert TmpR, R.hi, #s, #32-s
+
+    // Shift left:
+    //   LoR  = shl R.lo, #s
+    // Shift right:
+    //   TmpR = shr R.lo, #s
+
+    // Make a special case for A2_aslh and A2_asrh (they are predicable as
+    // opposed to S2_asl_i_r/S2_asr_i_r).
+    if (S == 16 && Left)
+      BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else if (S == 16 && Signed)
+      BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+        .addImm(S);
+
+    if (Left) {
+      // TmpR = extractu R.lo, #s, #32-s
+      BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+        .addImm(S)
+        .addImm(32-S);
+      // HiR  = or (TmpR, asl(R.hi, #s))
+      BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+        .addReg(TmpR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(S);
+    } else {
+      // HiR  = shr R.hi, #s
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+        .addImm(S);
+      // LoR  = insert TmpR, R.hi, #s, #32-s
+      BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+        .addReg(TmpR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(S)
+        .addImm(32-S);
+    }
+  } else if (S == 32) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+      .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+    if (!Signed)
+      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+        .addImm(0);
+    else  // Must be right shift.
+      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(31);
+  } else if (S < 64) {
+    S -= 32;
+    if (S == 16 && Left)
+      BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else if (S == 16 && Signed)
+      BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+    else
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+        .addImm(S);
+
+    if (Signed)
+      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(31);
+    else
+      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+        .addImm(0);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  MachineOperand &Op3 = MI->getOperand(3);
+  assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+  int64_t Sh64 = Op3.getImm();
+  assert(Sh64 >= 0 && Sh64 < 64);
+  unsigned S = Sh64;
+
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned LoR = P.first;
+  unsigned HiR = P.second;
+  using namespace Hexagon;
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RS1 = getRegState(Op1);
+  unsigned RS2 = getRegState(Op2);
+  const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+  unsigned LoSR = subreg_loreg;
+  unsigned HiSR = subreg_hireg;
+
+  // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+  // means:  Op0 = or (Op1, asl(Op2, Op3))
+
+  // Expansion of
+  //   DR = or (R1, asl(R2, #s))
+  //
+  //   LoR  = or (R1.lo, asl(R2.lo, #s))
+  //   Tmp1 = extractu R2.lo, #s, #32-s
+  //   Tmp2 = or R1.hi, Tmp1
+  //   HiR  = or (Tmp2, asl(R2.hi, #s))
+
+  if (S == 0) {
+    // DR  = or (R1, asl(R2, #0))
+    //    -> or (R1, R2)
+    // i.e. LoR = or R1.lo, R2.lo
+    //      HiR = or R1.hi, R2.hi
+    BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, HiSR);
+  } else if (S < 32) {
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+      .addImm(S);
+    unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+    BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+      .addImm(S)
+      .addImm(32-S);
+    unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+    BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(TmpR1);
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+      .addReg(TmpR2)
+      .addReg(Op2.getReg(), RS2, HiSR)
+      .addImm(S);
+  } else if (S == 32) {
+    // DR  = or (R1, asl(R2, #32))
+    //    -> or R1, R2.lo
+    // LoR = R1.lo
+    // HiR = or R1.hi, R2.lo
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, LoSR);
+  } else if (S < 64) {
+    // DR  = or (R1, asl(R2, #s))
+    //
+    // LoR = R1:lo
+    // HiR = or (R1:hi, asl(R2:lo, #s-32))
+    S -= 32;
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, LoSR)
+      .addImm(S);
+  }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  DEBUG(dbgs() << "Splitting: " << *MI);
+  bool Split = false;
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+
+  switch (Opc) {
+    case TargetOpcode::PHI:
+    case TargetOpcode::COPY: {
+      unsigned DstR = MI->getOperand(0).getReg();
+      if (MRI->getRegClass(DstR) == DoubleRC) {
+        createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+        createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+        Split = true;
+      }
+      break;
+    }
+    case A2_andp:
+      createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+    case A2_orp:
+      createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+    case A2_xorp:
+      createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+
+    case L2_loadrd_io:
+    case L2_loadrd_pi:
+    case S2_storerd_io:
+    case S2_storerd_pi:
+      splitMemRef(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_tfrpi:
+    case CONST64_Int_Real:
+      splitImmediate(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_combineii:
+    case A4_combineir:
+    case A4_combineii:
+    case A4_combineri:
+    case A2_combinew:
+      splitCombine(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_sxtw:
+      splitExt(MI, PairMap);
+      Split = true;
+      break;
+
+    case S2_asl_i_p:
+    case S2_asr_i_p:
+    case S2_lsr_i_p:
+      splitShift(MI, PairMap);
+      Split = true;
+      break;
+
+    case S2_asl_i_p_or:
+      splitAslOr(MI, PairMap);
+      Split = true;
+      break;
+
+    default:
+      llvm_unreachable("Instruction not splitable");
+      return false;
+  }
+
+  return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+      continue;
+    unsigned R = Op.getReg();
+    UUPairMap::const_iterator F = PairMap.find(R);
+    if (F == PairMap.end())
+      continue;
+    const UUPair &P = F->second;
+    switch (Op.getSubReg()) {
+      case Hexagon::subreg_loreg:
+        Op.setReg(P.first);
+        break;
+      case Hexagon::subreg_hireg:
+        Op.setReg(P.second);
+        break;
+    }
+    Op.setSubReg(0);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+      continue;
+    UUPairMap::const_iterator F = PairMap.find(R);
+    if (F == PairMap.end())
+      continue;
+    const UUPair &Pr = F->second;
+    unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+      .addReg(Pr.first)
+      .addImm(Hexagon::subreg_loreg)
+      .addReg(Pr.second)
+      .addImm(Hexagon::subreg_hireg);
+    Op.setReg(NewDR);
+  }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+  typedef std::set<MachineInstr*> MISet;
+  bool Changed = false;
+
+  DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+        dbgs() << '\n');
+
+  UUPairMap PairMap;
+
+  MISet SplitIns;
+  for (unsigned DR : Part) {
+    MachineInstr *DefI = MRI->getVRegDef(DR);
+    SplitIns.insert(DefI);
+
+    // Collect all instructions, including fixed ones.  We won't split them,
+    // but we need to visit them again to insert the REG_SEQUENCE instructions.
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U)
+      SplitIns.insert(U->getParent());
+
+    unsigned LoR = MRI->createVirtualRegister(IntRC);
+    unsigned HiR = MRI->createVirtualRegister(IntRC);
+    DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+                 << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+    PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+  }
+
+  MISet Erase;
+  for (auto MI : SplitIns) {
+    if (isFixedInstr(MI)) {
+      collapseRegPairs(MI, PairMap);
+    } else {
+      bool Done = splitInstr(MI, PairMap);
+      if (Done)
+        Erase.insert(MI);
+      Changed |= Done;
+    }
+  }
+
+  for (unsigned DR : Part) {
+    // Before erasing "double" instructions, revisit all uses of the double
+    // registers in this partition, and replace all uses of them with subre-
+    // gisters, with the corresponding single registers.
+    MISet Uses;
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U)
+      Uses.insert(U->getParent());
+    for (auto M : Uses)
+      replaceSubregUses(M, PairMap);
+  }
+
+  for (auto MI : Erase) {
+    MachineBasicBlock *B = MI->getParent();
+    B->erase(MI);
+  }
+
+  return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Splitting double registers in function: "
+        << MF.getName() << '\n');
+
+  auto &ST = MF.getSubtarget<HexagonSubtarget>();
+  TRI = ST.getRegisterInfo();
+  TII = ST.getInstrInfo();
+  MRI = &MF.getRegInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+
+  UUSetMap P2Rs;
+  LoopRegMap IRM;
+
+  collectIndRegs(IRM);
+  partitionRegisters(P2Rs);
+
+  DEBUG({
+    dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+    for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+      dbgs() << '#' << I->first << " -> ";
+      dump_partition(dbgs(), I->second, *TRI);
+      dbgs() << '\n';
+    }
+  });
+
+  bool Changed = false;
+  int Limit = MaxHSDR;
+
+  for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+    if (I->first == 0)
+      continue;
+    if (Limit >= 0 && Counter >= Limit)
+      break;
+    USet &Part = I->second;
+    DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+    if (!isProfitable(Part, IRM))
+      continue;
+    Counter++;
+    Changed |= splitPartition(Part);
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+  return new HexagonSplitDoubleRegs();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
new file mode 100644
index 0000000..b5339ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -0,0 +1,616 @@
+//===--- HexagonStoreWidening.cpp------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+//   S4_storeirb_io  %vreg100, 0, 0   ; store-immediate-byte
+//   S4_storeirb_io  %vreg100, 1, 0   ; store-immediate-byte
+// with
+//   S4_storeirh_io  %vreg100, 0, 0   ; store-immediate-halfword
+// The above is the general idea.  The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure".  Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-widen-stores"
+
+#include "HexagonTargetMachine.h"
+
+#include "llvm/PassSupport.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <algorithm>
+
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonStoreWidening();
+  void initializeHexagonStoreWideningPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonStoreWidening : public MachineFunctionPass {
+    const HexagonInstrInfo      *TII;
+    const HexagonRegisterInfo   *TRI;
+    const MachineRegisterInfo   *MRI;
+    AliasAnalysis               *AA;
+    MachineFunction             *MF;
+
+  public:
+    static char ID;
+    HexagonStoreWidening() : MachineFunctionPass(ID) {
+      initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    const char *getPassName() const override {
+      return "Hexagon Store Widening";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.addPreserved<AAResultsWrapperPass>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    static bool handledStoreType(const MachineInstr *MI);
+
+  private:
+    static const int MaxWideSize = 4;
+
+    typedef std::vector<MachineInstr*> InstrGroup;
+    typedef std::vector<InstrGroup> InstrGroupList;
+
+    bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
+    bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
+    void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
+        InstrGroup::iterator End, InstrGroup &Group);
+    void createStoreGroups(MachineBasicBlock &MBB,
+        InstrGroupList &StoreGroups);
+    bool processBasicBlock(MachineBasicBlock &MBB);
+    bool processStoreGroup(InstrGroup &Group);
+    bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
+        InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+    bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+    bool replaceStores(InstrGroup &OG, InstrGroup &NG);
+    bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+  };
+
+} // namespace
+
+
+namespace {
+
+// Some local helper functions...
+unsigned getBaseAddressRegister(const MachineInstr *MI) {
+  const MachineOperand &MO = MI->getOperand(0);
+  assert(MO.isReg() && "Expecting register operand");
+  return MO.getReg();
+}
+
+int64_t getStoreOffset(const MachineInstr *MI) {
+  unsigned OpC = MI->getOpcode();
+  assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
+
+  switch (OpC) {
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io: {
+      const MachineOperand &MO = MI->getOperand(1);
+      assert(MO.isImm() && "Expecting immediate offset");
+      return MO.getImm();
+    }
+  }
+  dbgs() << *MI;
+  llvm_unreachable("Store offset calculation missing for a handled opcode");
+  return 0;
+}
+
+const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+  assert(!MI->memoperands_empty() && "Expecting memory operands");
+  return **MI->memoperands_begin();
+}
+
+} // namespace
+
+
+char HexagonStoreWidening::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+                "Hexason Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+                "Hexagon Store Widening", false, false)
+
+
+// Filtering function: any stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
+  // For now, only handle stores of immediate values.
+  // Also, reject stores to stack slots.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io:
+      // Base address must be a register. (Implement FI later.)
+      return MI->getOperand(0).isReg();
+    default:
+      return false;
+  }
+}
+
+
+// Check if the machine memory operand MMO is aliased with any of the
+// stores in the store group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+      const MachineMemOperand &MMO) {
+  if (!MMO.getValue())
+    return true;
+
+  MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
+
+  for (auto SI : Stores) {
+    const MachineMemOperand &SMO = getStoreTarget(SI);
+    if (!SMO.getValue())
+      return true;
+
+    MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
+    if (AA->alias(L, SL))
+      return true;
+  }
+
+  return false;
+}
+
+
+// Check if the machine instruction MI accesses any storage aliased with
+// any store in the group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+      const MachineInstr *MI) {
+  for (auto &I : MI->memoperands())
+    if (instrAliased(Stores, *I))
+      return true;
+  return false;
+}
+
+
+// Inspect a machine basic block, and generate store groups out of stores
+// encountered in the block.
+//
+// A store group is a group of stores that use the same base register,
+// and which can be reordered within that group without altering the
+// semantics of the program.  A single store group could be widened as
+// a whole, if there existed a single store instruction with the same
+// semantics as the entire group.  In many cases, a single store group
+// may need more than one wide store.
+void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
+      InstrGroupList &StoreGroups) {
+  InstrGroup AllInsns;
+
+  // Copy all instruction pointers from the basic block to a temporary
+  // list.  This will allow operating on the list, and modifying its
+  // elements without affecting the basic block.
+  for (auto &I : MBB)
+    AllInsns.push_back(&I);
+
+  // Traverse all instructions in the AllInsns list, and if we encounter
+  // a store, then try to create a store group starting at that instruction
+  // i.e. a sequence of independent stores that can be widened.
+  for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    // Skip null pointers (processed instructions).
+    if (!MI || !handledStoreType(MI))
+      continue;
+
+    // Found a store.  Try to create a store group.
+    InstrGroup G;
+    createStoreGroup(MI, I+1, E, G);
+    if (G.size() > 1)
+      StoreGroups.push_back(G);
+  }
+}
+
+
+// Create a single store group.  The stores need to be independent between
+// themselves, and also there cannot be other instructions between them
+// that could read or modify storage being stored into.
+void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
+      InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
+  assert(handledStoreType(BaseStore) && "Unexpected instruction");
+  unsigned BaseReg = getBaseAddressRegister(BaseStore);
+  InstrGroup Other;
+
+  Group.push_back(BaseStore);
+
+  for (auto I = Begin; I != End; ++I) {
+    MachineInstr *MI = *I;
+    if (!MI)
+      continue;
+
+    if (handledStoreType(MI)) {
+      // If this store instruction is aliased with anything already in the
+      // group, terminate the group now.
+      if (instrAliased(Group, getStoreTarget(MI)))
+        return;
+      // If this store is aliased to any of the memory instructions we have
+      // seen so far (that are not a part of this group), terminate the group.
+      if (instrAliased(Other, getStoreTarget(MI)))
+        return;
+
+      unsigned BR = getBaseAddressRegister(MI);
+      if (BR == BaseReg) {
+        Group.push_back(MI);
+        *I = 0;
+        continue;
+      }
+    }
+
+    // Assume calls are aliased to everything.
+    if (MI->isCall() || MI->hasUnmodeledSideEffects())
+      return;
+
+    if (MI->mayLoad() || MI->mayStore()) {
+      if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
+        return;
+      Other.push_back(MI);
+    }
+  } // for
+}
+
+
+// Check if store instructions S1 and S2 are adjacent.  More precisely,
+// S2 has to access memory immediately following that accessed by S1.
+bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
+      const MachineInstr *S2) {
+  if (!handledStoreType(S1) || !handledStoreType(S2))
+    return false;
+
+  const MachineMemOperand &S1MO = getStoreTarget(S1);
+
+  // Currently only handling immediate stores.
+  int Off1 = S1->getOperand(1).getImm();
+  int Off2 = S2->getOperand(1).getImm();
+
+  return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
+                     : int(Off1+S1MO.getSize()) == Off2;
+}
+
+
+/// Given a sequence of adjacent stores, and a maximum size of a single wide
+/// store, pick a group of stores that  can be replaced by a single store
+/// of size not exceeding MaxSize.  The selected sequence will be recorded
+/// in OG ("old group" of instructions).
+/// OG should be empty on entry, and should be left empty if the function
+/// fails.
+bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
+      InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
+      unsigned MaxSize) {
+  assert(Begin != End && "No instructions to analyze");
+  assert(OG.empty() && "Old group not empty on entry");
+
+  if (std::distance(Begin, End) <= 1)
+    return false;
+
+  MachineInstr *FirstMI = *Begin;
+  assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
+  const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
+  unsigned Alignment = FirstMMO.getAlignment();
+  unsigned SizeAccum = FirstMMO.getSize();
+  unsigned FirstOffset = getStoreOffset(FirstMI);
+
+  // The initial value of SizeAccum should always be a power of 2.
+  assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
+
+  // If the size of the first store equals to or exceeds the limit, do nothing.
+  if (SizeAccum >= MaxSize)
+    return false;
+
+  // If the size of the first store is greater than or equal to the address
+  // stored to, then the store cannot be made any wider.
+  if (SizeAccum >= Alignment)
+    return false;
+
+  // The offset of a store will put restrictions on how wide the store can be.
+  // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
+  // If the first store already exhausts the offset limits, quit.  Test this
+  // by checking if the next wider size would exceed the limit.
+  if ((2*SizeAccum-1) & FirstOffset)
+    return false;
+
+  OG.push_back(FirstMI);
+  MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
+  InstrGroup::iterator I = Begin+1;
+
+  // Pow2Num will be the largest number of elements in OG such that the sum
+  // of sizes of stores 0...Pow2Num-1 will be a power of 2.
+  unsigned Pow2Num = 1;
+  unsigned Pow2Size = SizeAccum;
+
+  // Be greedy: keep accumulating stores as long as they are to adjacent
+  // memory locations, and as long as the total number of bytes stored
+  // does not exceed the limit (MaxSize).
+  // Keep track of when the total size covered is a power of 2, since
+  // this is a size a single store can cover.
+  while (I != End) {
+    S2 = *I;
+    // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
+    // any other store to fill the "hole".
+    if (!storesAreAdjacent(S1, S2))
+      break;
+
+    unsigned S2Size = getStoreTarget(S2).getSize();
+    if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
+      break;
+
+    OG.push_back(S2);
+    SizeAccum += S2Size;
+    if (isPowerOf2_32(SizeAccum)) {
+      Pow2Num = OG.size();
+      Pow2Size = SizeAccum;
+    }
+    if ((2*Pow2Size-1) & FirstOffset)
+      break;
+
+    S1 = S2;
+    ++I;
+  }
+
+  // The stores don't add up to anything that can be widened.  Clean up.
+  if (Pow2Num <= 1) {
+    OG.clear();
+    return false;
+  }
+
+  // Only leave the stored being widened.
+  OG.resize(Pow2Num);
+  TotalSize = Pow2Size;
+  return true;
+}
+
+
+/// Given an "old group" OG of stores, create a "new group" NG of instructions
+/// to replace them.  Ideally, NG would only have a single instruction in it,
+/// but that may only be possible for store-immediate.
+bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
+      unsigned TotalSize) {
+  // XXX Current limitations:
+  // - only expect stores of immediate values in OG,
+  // - only handle a TotalSize of up to 4.
+
+  if (TotalSize > 4)
+    return false;
+
+  unsigned Acc = 0;  // Value accumulator.
+  unsigned Shift = 0;
+
+  for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    const MachineMemOperand &MMO = getStoreTarget(MI);
+    MachineOperand &SO = MI->getOperand(2);  // Source.
+    assert(SO.isImm() && "Expecting an immediate operand");
+
+    unsigned NBits = MMO.getSize()*8;
+    unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
+    unsigned Val = (SO.getImm() & Mask) << Shift;
+    Acc |= Val;
+    Shift += NBits;
+  }
+
+
+  MachineInstr *FirstSt = OG.front();
+  DebugLoc DL = OG.back()->getDebugLoc();
+  const MachineMemOperand &OldM = getStoreTarget(FirstSt);
+  MachineMemOperand *NewM =
+    MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+                             TotalSize, OldM.getAlignment(),
+                             OldM.getAAInfo());
+
+  if (Acc < 0x10000) {
+    // Create mem[hw] = #Acc
+    unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
+                    (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
+    assert(WOpc && "Unexpected size");
+
+    int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
+    const MCInstrDesc &StD = TII->get(WOpc);
+    MachineOperand &MR = FirstSt->getOperand(0);
+    int64_t Off = FirstSt->getOperand(1).getImm();
+    MachineInstr *StI = BuildMI(*MF, DL, StD)
+                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+                          .addImm(Off)
+                          .addImm(Val);
+    StI->addMemOperand(*MF, NewM);
+    NG.push_back(StI);
+  } else {
+    // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
+    const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
+    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+    unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+    MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
+                           .addImm(int(Acc));
+    NG.push_back(TfrI);
+
+    unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
+                    (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
+    assert(WOpc && "Unexpected size");
+
+    const MCInstrDesc &StD = TII->get(WOpc);
+    MachineOperand &MR = FirstSt->getOperand(0);
+    int64_t Off = FirstSt->getOperand(1).getImm();
+    MachineInstr *StI = BuildMI(*MF, DL, StD)
+                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+                          .addImm(Off)
+                          .addReg(VReg, RegState::Kill);
+    StI->addMemOperand(*MF, NewM);
+    NG.push_back(StI);
+  }
+
+  return true;
+}
+
+
+// Replace instructions from the old group OG with instructions from the
+// new group NG.  Conceptually, remove all instructions in OG, and then
+// insert all instructions in NG, starting at where the first instruction
+// from OG was (in the order in which they appeared in the basic block).
+// (The ordering in OG does not have to match the order in the basic block.)
+bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
+  DEBUG({
+    dbgs() << "Replacing:\n";
+    for (auto I : OG)
+      dbgs() << "  " << *I;
+    dbgs() << "with\n";
+    for (auto I : NG)
+      dbgs() << "  " << *I;
+  });
+
+  MachineBasicBlock *MBB = OG.back()->getParent();
+  MachineBasicBlock::iterator InsertAt = MBB->end();
+
+  // Need to establish the insertion point.  The best one is right before
+  // the first store in the OG, but in the order in which the stores occur
+  // in the program list.  Since the ordering in OG does not correspond
+  // to the order in the program list, we need to do some work to find
+  // the insertion point.
+
+  // Create a set of all instructions in OG (for quick lookup).
+  SmallPtrSet<MachineInstr*, 4> InstrSet;
+  for (auto I : OG)
+    InstrSet.insert(I);
+
+  // Traverse the block, until we hit an instruction from OG.
+  for (auto &I : *MBB) {
+    if (InstrSet.count(&I)) {
+      InsertAt = I;
+      break;
+    }
+  }
+
+  assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
+
+  bool AtBBStart = false;
+
+  // InsertAt points at the first instruction that will be removed.  We need
+  // to move it out of the way, so it remains valid after removing all the
+  // old stores, and so we are able to recover it back to the proper insertion
+  // position.
+  if (InsertAt != MBB->begin())
+    --InsertAt;
+  else
+    AtBBStart = true;
+
+  for (auto I : OG)
+    I->eraseFromParent();
+
+  if (!AtBBStart)
+    ++InsertAt;
+  else
+    InsertAt = MBB->begin();
+
+  for (auto I : NG)
+    MBB->insert(InsertAt, I);
+
+  return true;
+}
+
+
+// Break up the group into smaller groups, each of which can be replaced by
+// a single wide store.  Widen each such smaller group and replace the old
+// instructions with the widened ones.
+bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
+  bool Changed = false;
+  InstrGroup::iterator I = Group.begin(), E = Group.end();
+  InstrGroup OG, NG;   // Old and new groups.
+  unsigned CollectedSize;
+
+  while (I != E) {
+    OG.clear();
+    NG.clear();
+
+    bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
+                createWideStores(OG, NG, CollectedSize)              &&
+                replaceStores(OG, NG);
+    if (!Succ)
+      continue;
+
+    assert(OG.size() > 1 && "Created invalid group");
+    assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
+    I += OG.size()-1;
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+// Process a single basic block: create the store groups, and replace them
+// with the widened stores, if possible.  Processing of each basic block
+// is independent from processing of any other basic block.  This transfor-
+// mation could be stopped after having processed any basic block without
+// any ill effects (other than not having performed widening in the unpro-
+// cessed blocks).  Also, the basic blocks can be processed in any order.
+bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
+  InstrGroupList SGs;
+  bool Changed = false;
+
+  createStoreGroups(MBB, SGs);
+
+  auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
+    return getStoreOffset(A) < getStoreOffset(B);
+  };
+  for (auto &G : SGs) {
+    assert(G.size() > 1 && "Store group with fewer than 2 elements");
+    std::sort(G.begin(), G.end(), Less);
+
+    Changed |= processStoreGroup(G);
+  }
+
+  return Changed;
+}
+
+
+bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
+  MF = &MFn;
+  auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  MRI = &MFn.getRegInfo();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+  bool Changed = false;
+
+  for (auto &B : MFn)
+    Changed |= processBasicBlock(B);
+
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonStoreWidening() {
+  return new HexagonStoreWidening();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 0000000..aa0efd4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,125 @@
+//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
+  cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+  cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
+  cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+  cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Generate non-chopped conversion from fp to int."));
+
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
+  cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon Double Vector eXtensions"));
+
+static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon Vector eXtensions"));
+
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon MI Scheduling"));
+
+void HexagonSubtarget::initializeEnvironment() {
+  UseMemOps = false;
+  ModeIEEERndNear = false;
+  UseBSBScheduling = false;
+}
+
+HexagonSubtarget &
+HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+  CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
+
+  static std::map<StringRef, HexagonArchEnum> CpuTable {
+    { "hexagonv4", V4 },
+    { "hexagonv5", V5 },
+    { "hexagonv55", V55 },
+    { "hexagonv60", V60 },
+  };
+
+  auto foundIt = CpuTable.find(CPUString);
+  if (foundIt != CpuTable.end())
+    HexagonArchVersion = foundIt->second;
+  else
+    llvm_unreachable("Unrecognized Hexagon processor version");
+
+  UseHVXOps = false;
+  UseHVXDblOps = false;
+  ParseSubtargetFeatures(CPUString, FS);
+
+  if (EnableHexagonHVX.getPosition())
+    UseHVXOps = EnableHexagonHVX;
+  if (EnableHexagonHVXDouble.getPosition())
+    UseHVXDblOps = EnableHexagonHVXDouble;
+
+  return *this;
+}
+
+HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
+                                   StringRef FS, const TargetMachine &TM)
+    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      FrameLowering() {
+
+  initializeEnvironment();
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
+
+  // UseMemOps on by default unless disabled explicitly
+  if (DisableMemOps)
+    UseMemOps = false;
+  else if (EnableMemOps)
+    UseMemOps = true;
+  else
+    UseMemOps = false;
+
+  if (EnableIEEERndNear)
+    ModeIEEERndNear = true;
+  else
+    ModeIEEERndNear = false;
+
+  UseBSBScheduling = hasV60TOps() && EnableBSBSched;
+}
+
+// Pin the vtable to this file.
+void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+  if (DisableHexagonMISched.getNumOccurrences())
+    return !DisableHexagonMISched;
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 0000000..c7ae139
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,121 @@
+//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+
+#include "HexagonFrameLowering.h"
+#include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+#define Hexagon_SLOTS 4
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+  virtual void anchor();
+
+  bool UseMemOps, UseHVXOps, UseHVXDblOps;
+  bool ModeIEEERndNear;
+
+public:
+  enum HexagonArchEnum {
+    V4, V5, V55, V60
+  };
+
+  HexagonArchEnum HexagonArchVersion;
+  /// True if the target should use Back-Skip-Back scheduling. This is the
+  /// default for V60.
+  bool UseBSBScheduling;
+
+private:
+  std::string CPUString;
+  HexagonInstrInfo InstrInfo;
+  HexagonTargetLowering TLInfo;
+  HexagonSelectionDAGInfo TSInfo;
+  HexagonFrameLowering FrameLowering;
+  InstrItineraryData InstrItins;
+  void initializeEnvironment();
+
+public:
+  HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                   const TargetMachine &TM);
+
+  /// getInstrItins - Return the instruction itineraries based on subtarget
+  /// selection.
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+  const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+  const HexagonRegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+  const HexagonTargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const HexagonFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
+  HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
+                                                    StringRef FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool useMemOps() const { return UseMemOps; }
+  bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
+  bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+  bool hasV55TOps() const { return getHexagonArchVersion() >= V55; }
+  bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
+  bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
+  bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
+  bool modeIEEERndNear() const { return ModeIEEERndNear; }
+  bool useHVXOps() const { return UseHVXOps; }
+  bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
+  bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+
+  bool useBSBScheduling() const { return UseBSBScheduling; }
+  bool enableMachineScheduler() const override;
+  // Always use the TargetLowering default scheduler.
+  // FIXME: This will use the vliw scheduler which is probably just hurting
+  // compiler time and will be removed eventually anyway.
+  bool enableMachineSchedDefaultSched() const override { return false; }
+
+  const std::string &getCPUString () const { return CPUString; }
+
+  // Threshold for small data section
+  unsigned getSmallDataThreshold() const {
+    return Hexagon_SMALL_DATA_THRESHOLD;
+  }
+  const HexagonArchEnum &getHexagonArchVersion() const {
+    return HexagonArchVersion;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 0000000..9dccd69
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,299 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Hexagon target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "HexagonMachineScheduler.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+  cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
+
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> DisableStoreWidening("disable-store-widen",
+  cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Early expansion of MUX"));
+
+static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
+  cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+
+static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
+  cl::Hidden, cl::desc("Generate \"insert\" instructions"));
+
+static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+
+static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
+  cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+
+static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden,
+  cl::desc("Enable converting conditional transfers into MUX instructions"));
+
+static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
+  cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
+  "predicate instructions"));
+
+static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
+  cl::desc("Disable splitting double registers"));
+
+static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
+  cl::Hidden, cl::desc("Bit simplification"));
+
+static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
+  cl::Hidden, cl::desc("Loop rescheduling"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library.  In particular, it seems that it is not possible to get
+/// things to work on Win32 without this.  Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+  // Register the target.
+  RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
+  return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
+                    createVLIWMachineSched);
+
+namespace llvm {
+  FunctionPass *createHexagonBitSimplify();
+  FunctionPass *createHexagonCallFrameInformation();
+  FunctionPass *createHexagonCFGOptimizer();
+  FunctionPass *createHexagonCommonGEP();
+  FunctionPass *createHexagonCopyToCombine();
+  FunctionPass *createHexagonEarlyIfConversion();
+  FunctionPass *createHexagonExpandCondsets();
+  FunctionPass *createHexagonExpandPredSpillCode();
+  FunctionPass *createHexagonFixupHwLoops();
+  FunctionPass *createHexagonGenExtract();
+  FunctionPass *createHexagonGenInsert();
+  FunctionPass *createHexagonGenMux();
+  FunctionPass *createHexagonGenPredicate();
+  FunctionPass *createHexagonHardwareLoops();
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
+  FunctionPass *createHexagonLoopRescheduling();
+  FunctionPass *createHexagonNewValueJump();
+  FunctionPass *createHexagonOptimizeSZextends();
+  FunctionPass *createHexagonPacketizer();
+  FunctionPass *createHexagonPeephole();
+  FunctionPass *createHexagonSplitConst32AndConst64();
+  FunctionPass *createHexagonSplitDoubleRegs();
+  FunctionPass *createHexagonStoreWidening();
+} // end namespace llvm;
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-"
+                        "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-"
+                        "n16:32", TT, CPU, FS, Options, RM, CM, OL),
+      TLOF(make_unique<HexagonTargetObjectFile>()) {
+  initAsmInfo();
+}
+
+const HexagonSubtarget *
+HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
+  AttributeSet FnAttrs = F.getAttributes();
+  Attribute CPUAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+  Attribute FSAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+                        ? CPUAttr.getValueAsString().str()
+                        : TargetCPU;
+  std::string FS = !FSAttr.hasAttribute(Attribute::None)
+                       ? FSAttr.getValueAsString().str()
+                       : TargetFS;
+
+  auto &I = SubtargetMap[CPU + FS];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+  }
+  return I.get();
+}
+
+TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](const Function &F) {
+    return TargetTransformInfo(HexagonTTIImpl(this, F));
+  });
+}
+
+
+HexagonTargetMachine::~HexagonTargetMachine() {}
+
+namespace {
+/// Hexagon Code Generator Pass Configuration Options.
+class HexagonPassConfig : public TargetPassConfig {
+public:
+  HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {
+    bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+    if (!NoOpt) {
+      if (EnableExpandCondsets) {
+        Pass *Exp = createHexagonExpandCondsets();
+        insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+      }
+    }
+  }
+
+  HexagonTargetMachine &getHexagonTargetMachine() const {
+    return getTM<HexagonTargetMachine>();
+  }
+
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    return createVLIWMachineSched(C);
+  }
+
+  void addIRPasses() override;
+  bool addInstSelector() override;
+  void addPreRegAlloc() override;
+  void addPostRegAlloc() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+};
+} // namespace
+
+TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new HexagonPassConfig(this, PM);
+}
+
+void HexagonPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+  addPass(createAtomicExpandPass(TM));
+  if (!NoOpt) {
+    if (EnableCommGEP)
+      addPass(createHexagonCommonGEP());
+    // Replace certain combinations of shifts and ands with extracts.
+    if (EnableGenExtract)
+      addPass(createHexagonGenExtract());
+  }
+}
+
+bool HexagonPassConfig::addInstSelector() {
+  HexagonTargetMachine &TM = getHexagonTargetMachine();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+  if (!NoOpt)
+    addPass(createHexagonOptimizeSZextends());
+
+  addPass(createHexagonISelDag(TM, getOptLevel()));
+
+  if (!NoOpt) {
+    // Create logical operations on predicate registers.
+    if (EnableGenPred)
+      addPass(createHexagonGenPredicate(), false);
+    // Rotate loops to expose bit-simplification opportunities.
+    if (EnableLoopResched)
+      addPass(createHexagonLoopRescheduling(), false);
+    // Split double registers.
+    if (!DisableHSDR)
+      addPass(createHexagonSplitDoubleRegs());
+    // Bit simplification.
+    if (EnableBitSimplify)
+      addPass(createHexagonBitSimplify(), false);
+    addPass(createHexagonPeephole());
+    printAndVerify("After hexagon peephole pass");
+    if (EnableGenInsert)
+      addPass(createHexagonGenInsert(), false);
+    if (EnableEarlyIf)
+      addPass(createHexagonEarlyIfConversion(), false);
+  }
+
+  return false;
+}
+
+void HexagonPassConfig::addPreRegAlloc() {
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (!DisableStoreWidening)
+      addPass(createHexagonStoreWidening(), false);
+    if (!DisableHardwareLoops)
+      addPass(createHexagonHardwareLoops(), false);
+  }
+}
+
+void HexagonPassConfig::addPostRegAlloc() {
+  if (getOptLevel() != CodeGenOpt::None)
+    if (!DisableHexagonCFGOpt)
+      addPass(createHexagonCFGOptimizer(), false);
+}
+
+void HexagonPassConfig::addPreSched2() {
+  addPass(createHexagonCopyToCombine(), false);
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID, false);
+  addPass(createHexagonSplitConst32AndConst64());
+}
+
+void HexagonPassConfig::addPreEmitPass() {
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+  if (!NoOpt)
+    addPass(createHexagonNewValueJump(), false);
+
+  // Expand Spill code for predicate registers.
+  addPass(createHexagonExpandPredSpillCode(), false);
+
+  // Create Packets.
+  if (!NoOpt) {
+    if (!DisableHardwareLoops)
+      addPass(createHexagonFixupHwLoops(), false);
+    // Generate MUX from pairs of conditional transfers.
+    if (EnableGenMux)
+      addPass(createHexagonGenMux(), false);
+
+    addPass(createHexagonPacketizer(), false);
+  }
+
+  // Add CFI instructions if necessary.
+  addPass(createHexagonCallFrameInformation(), false);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 0000000..968814b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,50 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
+
+public:
+  HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+  ~HexagonTargetMachine() override;
+  const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
+
+  static unsigned getModuleMatchQuality(const Module &M);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
+
+  HexagonTargetObjectFile *getObjFileLowering() const override {
+    return static_cast<HexagonTargetObjectFile*>(TLOF.get());
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 0000000..ccca620
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,98 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+                                cl::init(8), cl::Hidden,
+                cl::desc("The maximum size of an object in the sdata section"));
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+
+  SmallDataSection = getContext().getELFSection(
+      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+
+bool HexagonTargetObjectFile::IsSmallDataEnabled () const {
+  return SmallDataThreshold > 0;
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  // If the primary definition of this global value is outside the current
+  // translation unit or the global value is available for inspection but not
+  // emission, then do nothing.
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+  // up in getKindForGlobal(GV, TM).
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) {
+    Type *Ty = GV->getType()->getElementType();
+    return IsInSmallSection(
+        GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
+  }
+
+  return false;
+}
+
+MCSection *
+HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                                SectionKind Kind, Mangler &Mang,
+                                                const TargetMachine &TM) const {
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 0000000..da0eeeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+  class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+    MCSectionELF *SmallDataSection;
+    MCSectionELF *SmallBSSSection;
+
+  public:
+    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    bool IsSmallDataEnabled () const;
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
new file mode 100644
index 0000000..e19c404
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -0,0 +1,31 @@
+//===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONTARGETSTREAMER_H
+#define HEXAGONTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class HexagonTargetStreamer : public MCTargetStreamer {
+public:
+  HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0){};
+  virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
+  virtual void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment,
+                                      unsigned AccessGranularity){};
+  virtual void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                           unsigned ByteAlign,
+                                           unsigned AccessGranularity){};
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
new file mode 100644
index 0000000..a05443e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagontti"
+
+TargetTransformInfo::PopcntSupportKind
+HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
+  // Return Fast Hardware support as every input  < 64 bits will be promoted
+  // to 64 bits.
+  return TargetTransformInfo::PSK_FastHardware;
+}
+
+// The Hexagon target can unroll loops with run-time trip counts.
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+                                             TTI::UnrollingPreferences &UP) {
+  UP.Runtime = UP.Partial = true;
+}
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
+  return vector ? 0 : 32;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
new file mode 100644
index 0000000..71ae17a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
+  typedef BasicTTIImplBase<HexagonTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const HexagonSubtarget *ST;
+  const HexagonTargetLowering *TLI;
+
+  const HexagonSubtarget *getST() const { return ST; }
+  const HexagonTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  HexagonTTIImpl(const HexagonTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  HexagonTTIImpl(HexagonTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+  // The Hexagon target can unroll loops with run-time trip counts.
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool vector) const;
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 0000000..8185054
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,1601 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonVLIWPacketizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "packets"
+
+static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
+  cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon packetizer pass"));
+
+static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
+  cl::ZeroOrMore, cl::Hidden, cl::init(true),
+  cl::desc("Allow non-solo packetization of volatile memory references"));
+
+static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+
+static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
+  cl::init(false), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Disable vector double new-value-stores"));
+
+extern cl::opt<bool> ScheduleInlineAsm;
+
+namespace llvm {
+  FunctionPass *createHexagonPacketizer();
+  void initializeHexagonPacketizerPass(PassRegistry&);
+}
+
+
+namespace {
+  class HexagonPacketizer : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonPacketizer() : MachineFunctionPass(ID) {
+      initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    const char *getPassName() const override {
+      return "Hexagon Packetizer";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+  };
+
+  char HexagonPacketizer::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                    false, false)
+
+
+HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
+      MachineLoopInfo &MLI, AliasAnalysis *AA,
+      const MachineBranchProbabilityInfo *MBPI)
+    : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+}
+
+// Check if FirstI modifies a register that SecondI reads.
+static bool hasWriteToReadDep(const MachineInstr *FirstI,
+      const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
+  for (auto &MO : FirstI->operands()) {
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (SecondI->readsRegister(R, TRI))
+      return true;
+  }
+  return false;
+}
+
+
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+      MachineBasicBlock::iterator BundleIt, bool Before) {
+  MachineBasicBlock::instr_iterator InsertPt;
+  if (Before)
+    InsertPt = BundleIt.getInstrIterator();
+  else
+    InsertPt = std::next(BundleIt).getInstrIterator();
+
+  MachineBasicBlock &B = *MI->getParent();
+  // The instruction should at least be bundled with the preceding instruction
+  // (there will always be one, i.e. BUNDLE, if nothing else).
+  assert(MI->isBundledWithPred());
+  if (MI->isBundledWithSucc()) {
+    MI->clearFlag(MachineInstr::BundledSucc);
+    MI->clearFlag(MachineInstr::BundledPred);
+  } else {
+    // If it's not bundled with the successor (i.e. it is the last one
+    // in the bundle), then we can simply unbundle it from the predecessor,
+    // which will take care of updating the predecessor's flag.
+    MI->unbundleFromPred();
+  }
+  B.splice(InsertPt, &B, MI);
+
+  // Get the size of the bundle without asserting.
+  MachineBasicBlock::const_instr_iterator I(BundleIt);
+  MachineBasicBlock::const_instr_iterator E = B.instr_end();
+  unsigned Size = 0;
+  for (++I; I != E && I->isBundledWithPred(); ++I)
+    ++Size;
+
+  // If there are still two or more instructions, then there is nothing
+  // else to be done.
+  if (Size > 1)
+    return BundleIt;
+
+  // Otherwise, extract the single instruction out and delete the bundle.
+  MachineBasicBlock::iterator NextIt = std::next(BundleIt);
+  MachineInstr *SingleI = BundleIt->getNextNode();
+  SingleI->unbundleFromPred();
+  assert(!SingleI->isBundledWithSucc());
+  BundleIt->eraseFromParent();
+  return NextIt;
+}
+
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePacketizer)
+    return false;
+
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  auto &MLI = getAnalysis<MachineLoopInfo>();
+  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+  if (EnableGenAllInsnClass)
+    HII->genAllInsnTimingClasses(MF);
+
+  // Instantiate the packetizer.
+  HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (auto &MB : MF) {
+    auto End = MB.end();
+    auto MI = MB.begin();
+    while (MI != End) {
+      auto NextI = std::next(MI);
+      if (MI->isKill()) {
+        MB.erase(MI);
+        End = MB.end();
+      }
+      MI = NextI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (auto &MB : MF) {
+    auto Begin = MB.begin(), End = MB.end();
+    while (Begin != End) {
+      // First the first non-boundary starting from the end of the last
+      // scheduling region.
+      MachineBasicBlock::iterator RB = Begin;
+      while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
+        ++RB;
+      // First the first boundary starting from the beginning of the new
+      // region.
+      MachineBasicBlock::iterator RE = RB;
+      while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
+        ++RE;
+      // Add the scheduling boundary if it's not block end.
+      if (RE != End)
+        ++RE;
+      // If RB == End, then RE == End.
+      if (RB != End)
+        Packetizer.PacketizeMIs(&MB, RB, RE);
+
+      Begin = RE;
+    }
+  }
+
+  Packetizer.unpacketizeSoloInstrs(MF);
+  return true;
+}
+
+
+// Reserve resources for a constant extender. Trigger an assertion if the
+// reservation fails.
+void HexagonPacketizerList::reserveResourcesForConstExt() {
+  if (!tryAllocateResourcesForConstExt(true))
+    llvm_unreachable("Resources not available");
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt() {
+  return tryAllocateResourcesForConstExt(false);
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
+// return true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
+  auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
+  bool Avail = ResourceTracker->canReserveResources(ExtMI);
+  if (Reserve && Avail)
+    ResourceTracker->reserveResources(ExtMI);
+  MF.DeleteMachineInstr(ExtMI);
+  return Avail;
+}
+
+
+bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+      SDep::Kind DepType, unsigned DepReg) {
+  // Check for LR dependence.
+  if (DepReg == HRI->getRARegister())
+    return true;
+
+  if (HII->isDeallocRet(MI))
+    if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
+      return true;
+
+  // Check if this is a predicate dependence.
+  const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
+  if (RC == &Hexagon::PredRegsRegClass)
+    return true;
+
+  // Assumes that the first operand of the CALLr is the function address.
+  if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
+    MachineOperand MO = MI->getOperand(0);
+    if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
+      return true;
+  }
+
+  return false;
+}
+
+static bool isRegDependence(const SDep::Kind DepType) {
+  return DepType == SDep::Data || DepType == SDep::Anti ||
+         DepType == SDep::Output;
+}
+
+static bool isDirectJump(const MachineInstr* MI) {
+  return MI->getOpcode() == Hexagon::J2_jump;
+}
+
+static bool isSchedBarrier(const MachineInstr* MI) {
+  switch (MI->getOpcode()) {
+  case Hexagon::Y2_barrier:
+    return true;
+  }
+  return false;
+}
+
+static bool isControlFlow(const MachineInstr* MI) {
+  return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+
+/// Returns true if the instruction modifies a callee-saved register.
+static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
+                                     const TargetRegisterInfo *TRI) {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+    if (MI->modifiesRegister(*CSR, TRI))
+      return true;
+  return false;
+}
+
+// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+// Returns true if an instruction can be promoted to .new predicate or
+// new-value store.
+bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
+  return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
+}
+
+// Promote an instructiont to its .cur form.
+// At this time, we have already made a call to canPromoteToDotCur and made
+// sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
+  assert(DepType == SDep::Data);
+  int CurOpcode = HII->getDotCurOp(MI);
+  MI->setDesc(HII->get(CurOpcode));
+  return true;
+}
+
+void HexagonPacketizerList::cleanUpDotCur() {
+  MachineInstr *MI = NULL;
+  for (auto BI : CurrentPacketMIs) {
+    DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+    if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+      MI = BI;
+      continue;
+    }
+    if (MI) {
+      for (auto &MO : BI->operands())
+        if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
+          return;
+    }
+  }
+  if (!MI)
+    return;
+  // We did not find a use of the CUR, so de-cur it.
+  MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+  DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
+}
+
+// Check to see if an instruction can be dot cur.
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass *RC) {
+  if (!HII->isV60VectorInstruction(MI))
+    return false;
+  if (!HII->isV60VectorInstruction(MII))
+    return false;
+
+  // Already a dot new instruction.
+  if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
+    return false;
+
+  if (!HII->mayBeCurLoad(MI))
+    return false;
+
+  // The "cur value" cannot come from inline asm.
+  if (PacketSU->getInstr()->isInlineAsm())
+    return false;
+
+  // Make sure candidate instruction uses cur.
+  DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
+        MI->dump();
+        dbgs() << "in packet\n";);
+  MachineInstr *MJ = MII;
+  DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
+  unsigned DestReg = MI->getOperand(0).getReg();
+  bool FoundMatch = false;
+  for (auto &MO : MJ->operands())
+    if (MO.isReg() && MO.getReg() == DestReg)
+      FoundMatch = true;
+  if (!FoundMatch)
+    return false;
+
+  // Check for existing uses of a vector register within the packet which
+  // would be affected by converting a vector load into .cur formt.
+  for (auto BI : CurrentPacketMIs) {
+    DEBUG(dbgs() << "packet has "; BI->dump(););
+    if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+  // We can convert the opcode into a .cur.
+  return true;
+}
+
+// Promote an instruction to its .new form. At this time, we have already
+// made a call to canPromoteToDotNew and made sure that it can *indeed* be
+// promoted.
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
+  assert (DepType == SDep::Data);
+  int NewOpcode;
+  if (RC == &Hexagon::PredRegsRegClass)
+    NewOpcode = HII->getDotNewPredOp(MI, MBPI);
+  else
+    NewOpcode = HII->getDotNewOp(MI);
+  MI->setDesc(HII->get(NewOpcode));
+  return true;
+}
+
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
+  int NewOpcode = HII->getDotOldOp(MI->getOpcode());
+  MI->setDesc(HII->get(NewOpcode));
+  return true;
+}
+
+enum PredicateKind {
+  PK_False,
+  PK_True,
+  PK_Unknown
+};
+
+/// Returns true if an instruction is predicated on p0 and false if it's
+/// predicated on !p0.
+static PredicateKind getPredicateSense(const MachineInstr *MI,
+                                       const HexagonInstrInfo *HII) {
+  if (!HII->isPredicated(MI))
+    return PK_Unknown;
+  if (HII->isPredicatedTrue(MI))
+    return PK_True;
+  return PK_False;
+}
+
+static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+      const HexagonInstrInfo *HII) {
+  assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+  // Post Increment means duplicates. Use dense map to find duplicates in the
+  // list. Caution: Densemap initializes with the minimum of 64 buckets,
+  // whereas there are at most 5 operands in the post increment.
+  DenseSet<unsigned> DefRegsSet;
+  for (auto &MO : MI->operands())
+    if (MO.isReg() && MO.isDef())
+      DefRegsSet.insert(MO.getReg());
+
+  for (auto &MO : MI->operands())
+    if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
+      return MO;
+#else
+  if (MI->mayLoad()) {
+    const MachineOperand &Op1 = MI->getOperand(1);
+    // The 2nd operand is always the post increment operand in load.
+    assert(Op1.isReg() && "Post increment operand has be to a register.");
+    return Op1;
+  }
+  if (MI->getDesc().mayStore()) {
+    const MachineOperand &Op0 = MI->getOperand(0);
+    // The 1st operand is always the post increment operand in store.
+    assert(Op0.isReg() && "Post increment operand has be to a register.");
+    return Op0;
+  }
+#endif
+  // we should never come here.
+  llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// Get the value being stored.
+static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
+  // value being stored is always the last operand.
+  return MI->getOperand(MI->getNumOperands()-1);
+}
+
+static bool isLoadAbsSet(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::L4_loadrd_ap:
+    case Hexagon::L4_loadrb_ap:
+    case Hexagon::L4_loadrh_ap:
+    case Hexagon::L4_loadrub_ap:
+    case Hexagon::L4_loadruh_ap:
+    case Hexagon::L4_loadri_ap:
+      return true;
+  }
+  return false;
+}
+
+static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+  assert(isLoadAbsSet(MI));
+  return MI->getOperand(1);
+}
+
+
+// Can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+//    be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode, its address
+//    register cannot be a new-value register. Arch Spec 5.4.2.1.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+//    as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets the new-value register is conditional, then
+//    the instruction that uses the new-value register must also be conditional,
+//    and both must always have their predicates evaluate identically.
+//    Arch Spec 5.4.2.3.
+// 5. There is an implied restriction that a packet cannot have another store,
+//    if there is a new value store in the packet. Corollary: if there is
+//    already a store in a packet, there can not be a new value store.
+//    Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
+      const MachineInstr *PacketMI, unsigned DepReg) {
+  // Make sure we are looking at the store, that can be promoted.
+  if (!HII->mayBeNewStore(MI))
+    return false;
+
+  // Make sure there is dependency and can be new value'd.
+  const MachineOperand &Val = getStoreValueOperand(MI);
+  if (Val.isReg() && Val.getReg() != DepReg)
+    return false;
+
+  const MCInstrDesc& MCID = PacketMI->getDesc();
+
+  // First operand is always the result.
+  const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+  // Double regs can not feed into new value store: PRM section: 5.4.2.2.
+  if (PacketRC == &Hexagon::DoubleRegsRegClass)
+    return false;
+
+  // New-value stores are of class NV (slot 0), dual stores require class ST
+  // in slot 0 (PRM 5.5).
+  for (auto I : CurrentPacketMIs) {
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
+    if (PacketSU->getInstr()->mayStore())
+      return false;
+  }
+
+  // Make sure it's NOT the post increment register that we are going to
+  // new value.
+  if (HII->isPostIncrement(MI) &&
+      getPostIncrementOperand(MI, HII).getReg() == DepReg) {
+    return false;
+  }
+
+  if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+      getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
+    // If source is post_inc, or absolute-set addressing, it can not feed
+    // into new value store
+    //   r3 = memw(r2++#4)
+    //   memw(r30 + #-1404) = r2.new -> can not be new value store
+    // arch spec section: 5.4.2.1.
+    return false;
+  }
+
+  if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
+    return false;
+
+  // If the source that feeds the store is predicated, new value store must
+  // also be predicated.
+  if (HII->isPredicated(PacketMI)) {
+    if (!HII->isPredicated(MI))
+      return false;
+
+    // Check to make sure that they both will have their predicates
+    // evaluate identically.
+    unsigned predRegNumSrc = 0;
+    unsigned predRegNumDst = 0;
+    const TargetRegisterClass* predRegClass = nullptr;
+
+    // Get predicate register used in the source instruction.
+    for (auto &MO : PacketMI->operands()) {
+      if (!MO.isReg())
+        continue;
+      predRegNumSrc = MO.getReg();
+      predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
+      if (predRegClass == &Hexagon::PredRegsRegClass)
+        break;
+    }
+    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+        "predicate register not found in a predicated PacketMI instruction");
+
+    // Get predicate register used in new-value store instruction.
+    for (auto &MO : MI->operands()) {
+      if (!MO.isReg())
+        continue;
+      predRegNumDst = MO.getReg();
+      predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
+      if (predRegClass == &Hexagon::PredRegsRegClass)
+        break;
+    }
+    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+           "predicate register not found in a predicated MI instruction");
+
+    // New-value register producer and user (store) need to satisfy these
+    // constraints:
+    // 1) Both instructions should be predicated on the same register.
+    // 2) If producer of the new-value register is .new predicated then store
+    // should also be .new predicated and if producer is not .new predicated
+    // then store should not be .new predicated.
+    // 3) Both new-value register producer and user should have same predicate
+    // sense, i.e, either both should be negated or both should be non-negated.
+    if (predRegNumDst != predRegNumSrc ||
+        HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI)  ||
+        getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
+      return false;
+  }
+
+  // Make sure that other than the new-value register no other store instruction
+  // register has been modified in the same packet. Predicate registers can be
+  // modified by they should not be modified between the producer and the store
+  // instruction as it will make them both conditional on different values.
+  // We already know this to be true for all the instructions before and
+  // including PacketMI. Howerver, we need to perform the check for the
+  // remaining instructions in the packet.
+
+  unsigned StartCheck = 0;
+
+  for (auto I : CurrentPacketMIs) {
+    SUnit *TempSU = MIToSUnit.find(I)->second;
+    MachineInstr* TempMI = TempSU->getInstr();
+
+    // Following condition is true for all the instructions until PacketMI is
+    // reached (StartCheck is set to 0 before the for loop).
+    // StartCheck flag is 1 for all the instructions after PacketMI.
+    if (TempMI != PacketMI && !StartCheck) // Start processing only after
+      continue;                            // encountering PacketMI.
+
+    StartCheck = 1;
+    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
+      continue;
+
+    for (auto &MO : MI->operands())
+      if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
+        return false;
+  }
+
+  // Make sure that for non-POST_INC stores:
+  // 1. The only use of reg is DepReg and no other registers.
+  //    This handles V4 base+index registers.
+  //    The following store can not be dot new.
+  //    Eg.   r0 = add(r0, #3)
+  //          memw(r1+r0<<#2) = r0
+  if (!HII->isPostIncrement(MI)) {
+    for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+      const MachineOperand &MO = MI->getOperand(opNum);
+      if (MO.isReg() && MO.getReg() == DepReg)
+        return false;
+    }
+  }
+
+  // If data definition is because of implicit definition of the register,
+  // do not newify the store. Eg.
+  // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+  // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+  for (auto &MO : PacketMI->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
+      continue;
+    unsigned R = MO.getReg();
+    if (R == DepReg || HRI->isSuperRegister(DepReg, R))
+      return false;
+  }
+
+  // Handle imp-use of super reg case. There is a target independent side
+  // change that should prevent this situation but I am handling it for
+  // just-in-case. For example, we cannot newify R2 in the following case:
+  // %R3<def> = A2_tfrsi 0;
+  // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
+  for (auto &MO : MI->operands()) {
+    if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
+      return false;
+  }
+
+  // Can be dot new store.
+  return true;
+}
+
+// Can this MI to promoted to either new value store or new value jump.
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg,
+      MachineBasicBlock::iterator &MII) {
+  if (!HII->mayBeNewStore(MI))
+    return false;
+
+  // Check to see the store can be new value'ed.
+  MachineInstr *PacketMI = PacketSU->getInstr();
+  if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
+    return true;
+
+  // Check to see the compare/jump can be new value'ed.
+  // This is done as a pass on its own. Don't need to check it here.
+  return false;
+}
+
+static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
+  for (auto &MO : I->operands())
+    if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+      return true;
+  return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
+  // Already a dot new instruction.
+  if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
+    return false;
+
+  if (!isNewifiable(MI))
+    return false;
+
+  const MachineInstr *PI = PacketSU->getInstr();
+
+  // The "new value" cannot come from inline asm.
+  if (PI->isInlineAsm())
+    return false;
+
+  // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
+  // sense.
+  if (PI->isImplicitDef())
+    return false;
+
+  // If dependency is trough an implicitly defined register, we should not
+  // newify the use.
+  if (isImplicitDependency(PI, DepReg))
+    return false;
+
+  const MCInstrDesc& MCID = PI->getDesc();
+  const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+  if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
+    return false;
+
+  // predicate .new
+  // bug 5670: until that is fixed
+  // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+  if (RC == &Hexagon::PredRegsRegClass)
+    if (HII->isCondInst(MI) || MI->isReturn())
+      return HII->predCanBeUsedAsDotNew(PI, DepReg);
+
+  if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
+    return false;
+
+  // Create a dot new machine instruction to see if resources can be
+  // allocated. If not, bail out now.
+  int NewOpcode = HII->getDotNewOp(MI);
+  const MCInstrDesc &D = HII->get(NewOpcode);
+  MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
+  bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+  MF.DeleteMachineInstr(NewMI);
+  if (!ResourcesAvailable)
+    return false;
+
+  // New Value Store only. New Value Jump generated as a separate pass.
+  if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
+    return false;
+
+  return true;
+}
+
+// Go through the packet instructions and search for an anti dependency between
+// them and DepReg from MI. Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+//   b) %P0<def> = C2_or %P3<kill>, %P0<kill>
+//   c) %P3<def> = C2_tfrrp %R23
+//   d) %R1<def> = C2_cmovenewit %P3, 4
+//  }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti-dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+                                                        unsigned DepReg) {
+  SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
+
+  for (auto I : CurrentPacketMIs) {
+    // We only care for dependencies to predicated instructions
+    if (!HII->isPredicated(I))
+      continue;
+
+    // Scheduling Unit for current insn in the packet
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
+
+    // Look at dependencies between current members of the packet and
+    // predicate defining instruction MI. Make sure that dependency is
+    // on the exact register we care about.
+    if (PacketSU->isSucc(PacketSUDep)) {
+      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+        auto &Dep = PacketSU->Succs[i];
+        if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
+            Dep.getReg() == DepReg)
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+
+/// Gets the predicate register of a predicated instruction.
+static unsigned getPredicatedRegister(MachineInstr *MI,
+                                      const HexagonInstrInfo *QII) {
+  /// We use the following rule: The first predicate register that is a use is
+  /// the predicate register of a predicated instruction.
+  assert(QII->isPredicated(MI) && "Must be predicated instruction");
+
+  for (auto &Op : MI->operands()) {
+    if (Op.isReg() && Op.getReg() && Op.isUse() &&
+        Hexagon::PredRegsRegClass.contains(Op.getReg()))
+      return Op.getReg();
+  }
+
+  llvm_unreachable("Unknown instruction operand layout");
+  return 0;
+}
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements.
+bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
+                                                     MachineInstr *MI2) {
+  // If we don't know the predicate sense of the instructions bail out early, we
+  // need it later.
+  if (getPredicateSense(MI1, HII) == PK_Unknown ||
+      getPredicateSense(MI2, HII) == PK_Unknown)
+    return false;
+
+  // Scheduling unit for candidate.
+  SUnit *SU = MIToSUnit[MI1];
+
+  // One corner case deals with the following scenario:
+  // Trying to add
+  // a) %R24<def> = A2_tfrt %P0, %R25
+  // to this packet:
+  // {
+  //   b) %R25<def> = A2_tfrf %P0, %R24
+  //   c) %P0<def> = C2_cmpeqi %R26, 1
+  // }
+  //
+  // On general check a) and b) are complements, but presence of c) will
+  // convert a) to .new form, and then it is not a complement.
+  // We attempt to detect it by analyzing existing dependencies in the packet.
+
+  // Analyze relationships between all existing members of the packet.
+  // Look for Anti dependecy on the same predicate reg as used in the
+  // candidate.
+  for (auto I : CurrentPacketMIs) {
+    // Scheduling Unit for current insn in the packet.
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
+
+    // If this instruction in the packet is succeeded by the candidate...
+    if (PacketSU->isSucc(SU)) {
+      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+        auto Dep = PacketSU->Succs[i];
+        // The corner case exist when there is true data dependency between
+        // candidate and one of current packet members, this dep is on
+        // predicate reg, and there already exist anti dep on the same pred in
+        // the packet.
+        if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
+            Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
+          // Here I know that I is predicate setting instruction with true
+          // data dep to candidate on the register we care about - c) in the
+          // above example. Now I need to see if there is an anti dependency
+          // from c) to any other instruction in the same packet on the pred
+          // reg of interest.
+          if (restrictingDepExistInPacket(I, Dep.getReg()))
+            return false;
+        }
+      }
+    }
+  }
+
+  // If the above case does not apply, check regular complement condition.
+  // Check that the predicate register is the same and that the predicate
+  // sense is different We also need to differentiate .old vs. .new: !p0
+  // is not complementary to p0.new.
+  unsigned PReg1 = getPredicatedRegister(MI1, HII);
+  unsigned PReg2 = getPredicatedRegister(MI2, HII);
+  return PReg1 == PReg2 &&
+         Hexagon::PredRegsRegClass.contains(PReg1) &&
+         Hexagon::PredRegsRegClass.contains(PReg2) &&
+         getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
+         HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
+}
+
+// Initialize packetizer flags.
+void HexagonPacketizerList::initPacketizerState() {
+  Dependence = false;
+  PromotedToDotNew = false;
+  GlueToNewValueJump = false;
+  GlueAllocframeStore = false;
+  FoundSequentialDependence = false;
+}
+
+// Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
+      const MachineBasicBlock*) {
+  if (MI->isDebugValue())
+    return true;
+
+  if (MI->isCFIInstruction())
+    return false;
+
+  // We must print out inline assembly.
+  if (MI->isInlineAsm())
+    return false;
+
+  if (MI->isImplicitDef())
+    return false;
+
+  // We check if MI has any functional units mapped to it. If it doesn't,
+  // we ignore the instruction.
+  const MCInstrDesc& TID = MI->getDesc();
+  auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
+  unsigned FuncUnits = IS->getUnits();
+  return !FuncUnits;
+}
+
+bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
+  if (MI->isEHLabel() || MI->isCFIInstruction())
+    return true;
+
+  // Consider inline asm to not be a solo instruction by default.
+  // Inline asm will be put in a packet temporarily, but then it will be
+  // removed, and placed outside of the packet (before or after, depending
+  // on dependencies).  This is to reduce the impact of inline asm as a
+  // "packet splitting" instruction.
+  if (MI->isInlineAsm() && !ScheduleInlineAsm)
+    return true;
+
+  // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+  // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+  // They must not be grouped with other instructions in a packet.
+  if (isSchedBarrier(MI))
+    return true;
+
+  if (HII->isSolo(MI))
+    return true;
+
+  if (MI->getOpcode() == Hexagon::A2_nop)
+    return true;
+
+  return false;
+}
+
+
+// Quick check if instructions MI and MJ cannot coexist in the same packet.
+// Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
+// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
+// For full test call this function twice:
+//   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
+// Doing the test only one way saves the amount of code in this function,
+// since every test would need to be repeated with the MI and MJ reversed.
+static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+      const HexagonInstrInfo &HII) {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+      HII.isHVXMemWithAIndirect(MI, MJ))
+    return true;
+
+  // An inline asm cannot be together with a branch, because we may not be
+  // able to remove the asm out after packetizing (i.e. if the asm must be
+  // moved past the bundle).  Similarly, two asms cannot be together to avoid
+  // complications when determining their relative order outside of a bundle.
+  if (MI->isInlineAsm())
+    return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
+           MJ->isCall() || MJ->isTerminator();
+
+  // "False" really means that the quick check failed to determine if
+  // I and J cannot coexist.
+  return false;
+}
+
+
+// Full, symmetric check.
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
+      const MachineInstr *MJ) {
+  return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
+}
+
+void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
+  for (auto &B : MF) {
+    MachineBasicBlock::iterator BundleIt;
+    MachineBasicBlock::instr_iterator NextI;
+    for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
+      NextI = std::next(I);
+      MachineInstr *MI = &*I;
+      if (MI->isBundle())
+        BundleIt = I;
+      if (!MI->isInsideBundle())
+        continue;
+
+      // Decide on where to insert the instruction that we are pulling out.
+      // Debug instructions always go before the bundle, but the placement of
+      // INLINE_ASM depends on potential dependencies.  By default, try to
+      // put it before the bundle, but if the asm writes to a register that
+      // other instructions in the bundle read, then we need to place it
+      // after the bundle (to preserve the bundle semantics).
+      bool InsertBeforeBundle;
+      if (MI->isInlineAsm())
+        InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
+      else if (MI->isDebugValue())
+        InsertBeforeBundle = true;
+      else
+        continue;
+
+      BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
+    }
+  }
+}
+
+// Check if a given instruction is of class "system".
+static bool isSystemInstr(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::Y2_barrier:
+    case Hexagon::Y2_dcfetchbo:
+      return true;
+  }
+  return false;
+}
+
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
+                                              const MachineInstr *J) {
+  // The dependence graph may not include edges between dead definitions,
+  // so without extra checks, we could end up packetizing two instruction
+  // defining the same (dead) register.
+  if (I->isCall() || J->isCall())
+    return false;
+  if (HII->isPredicated(I) || HII->isPredicated(J))
+    return false;
+
+  BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
+  for (auto &MO : I->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+      continue;
+    DeadDefs[MO.getReg()] = true;
+  }
+
+  for (auto &MO : J->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+      continue;
+    unsigned R = MO.getReg();
+    if (R != Hexagon::USR_OVF && DeadDefs[R])
+      return true;
+  }
+  return false;
+}
+
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
+                                                 const MachineInstr *J) {
+  // A save callee-save register function call can only be in a packet
+  // with instructions that don't write to the callee-save registers.
+  if ((HII->isSaveCalleeSavedRegsCall(I) &&
+       doesModifyCalleeSavedReg(J, HRI)) ||
+      (HII->isSaveCalleeSavedRegsCall(J) &&
+       doesModifyCalleeSavedReg(I, HRI)))
+    return true;
+
+  // Two control flow instructions cannot go in the same packet.
+  if (isControlFlow(I) && isControlFlow(J))
+    return true;
+
+  // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
+  // contain a speculative indirect jump,
+  // a new-value compare jump or a dealloc_return.
+  auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
+    if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+      return true;
+    if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
+      return true;
+    return false;
+  };
+
+  if (HII->isLoopN(I) && isBadForLoopN(J))
+    return true;
+  if (HII->isLoopN(J) && isBadForLoopN(I))
+    return true;
+
+  // dealloc_return cannot appear in the same packet as a conditional or
+  // unconditional jump.
+  return HII->isDeallocRet(I) &&
+         (J->isBranch() || J->isCall() || J->isBarrier());
+}
+
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
+                                                    const MachineInstr *J) {
+  bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
+  bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+  if ((SysI && StoreJ) || (SysJ && StoreI))
+    return true;
+
+  if (StoreI && StoreJ) {
+    if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
+      return true;
+  } else {
+    // A memop cannot be in the same packet with another memop or a store.
+    // Two stores can be together, but here I and J cannot both be stores.
+    bool MopStI = HII->isMemOp(I) || StoreI;
+    bool MopStJ = HII->isMemOp(J) || StoreJ;
+    if (MopStI && MopStJ)
+      return true;
+  }
+
+  return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
+}
+
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+  MachineInstr *I = SUI->getInstr();
+  MachineInstr *J = SUJ->getInstr();
+  assert(I && J && "Unable to packetize null instruction!");
+
+  // Clear IgnoreDepMIs when Packet starts.
+  if (CurrentPacketMIs.size() == 1)
+    IgnoreDepMIs.clear();
+
+  MachineBasicBlock::iterator II = I;
+  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+  // Solo instructions cannot go in the packet.
+  assert(!isSoloInstruction(I) && "Unexpected solo instr!");
+
+  if (cannotCoexist(I, J))
+    return false;
+
+  Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
+  if (Dependence)
+    return false;
+
+  // V4 allows dual stores. It does not allow second store, if the first
+  // store is not in SLOT0. New value store, new value jump, dealloc_return
+  // and memop always take SLOT0. Arch spec 3.4.4.2.
+  Dependence = hasV4SpecificDependence(I, J);
+  if (Dependence)
+    return false;
+
+  // If an instruction feeds new value jump, glue it.
+  MachineBasicBlock::iterator NextMII = I;
+  ++NextMII;
+  if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
+    MachineInstr *NextMI = NextMII;
+
+    bool secondRegMatch = false;
+    const MachineOperand &NOp0 = NextMI->getOperand(0);
+    const MachineOperand &NOp1 = NextMI->getOperand(1);
+
+    if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
+      secondRegMatch = true;
+
+    for (auto I : CurrentPacketMIs) {
+      SUnit *PacketSU = MIToSUnit.find(I)->second;
+      MachineInstr *PI = PacketSU->getInstr();
+      // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
+      if (PI->isCall()) {
+        Dependence = true;
+        break;
+      }
+      // Validate:
+      // 1. Packet does not have a store in it.
+      // 2. If the first operand of the nvj is newified, and the second
+      //    operand is also a reg, it (second reg) is not defined in
+      //    the same packet.
+      // 3. If the second operand of the nvj is newified, (which means
+      //    first operand is also a reg), first reg is not defined in
+      //    the same packet.
+      if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+          HII->isLoopN(PI)) {
+        Dependence = true;
+        break;
+      }
+      // Check #2/#3.
+      const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
+      if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
+        Dependence = true;
+        break;
+      }
+    }
+
+    if (Dependence)
+      return false;
+    GlueToNewValueJump = true;
+  }
+
+  // There no dependency between a prolog instruction and its successor.
+  if (!SUJ->isSucc(SUI))
+    return true;
+
+  for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
+    if (FoundSequentialDependence)
+      break;
+
+    if (SUJ->Succs[i].getSUnit() != SUI)
+      continue;
+
+    SDep::Kind DepType = SUJ->Succs[i].getKind();
+    // For direct calls:
+    // Ignore register dependences for call instructions for packetization
+    // purposes except for those due to r31 and predicate registers.
+    //
+    // For indirect calls:
+    // Same as direct calls + check for true dependences to the register
+    // used in the indirect call.
+    //
+    // We completely ignore Order dependences for call instructions.
+    //
+    // For returns:
+    // Ignore register dependences for return instructions like jumpr,
+    // dealloc return unless we have dependencies on the explicit uses
+    // of the registers used by jumpr (like r31) or dealloc return
+    // (like r29 or r30).
+    //
+    // TODO: Currently, jumpr is handling only return of r31. So, the
+    // following logic (specificaly isCallDependent) is working fine.
+    // We need to enable jumpr for register other than r31 and then,
+    // we need to rework the last part, where it handles indirect call
+    // of that (isCallDependent) function. Bug 6216 is opened for this.
+    unsigned DepReg = 0;
+    const TargetRegisterClass *RC = nullptr;
+    if (DepType == SDep::Data) {
+      DepReg = SUJ->Succs[i].getReg();
+      RC = HRI->getMinimalPhysRegClass(DepReg);
+    }
+
+    if (I->isCall() || I->isReturn()) {
+      if (!isRegDependence(DepType))
+        continue;
+      if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
+        continue;
+    }
+
+    if (DepType == SDep::Data) {
+      if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
+        if (promoteToDotCur(J, DepType, II, RC))
+          continue;
+    }
+
+    // Data dpendence ok if we have load.cur.
+    if (DepType == SDep::Data && HII->isDotCurInst(J)) {
+      if (HII->isV60VectorInstruction(I))
+        continue;
+    }
+
+    // For instructions that can be promoted to dot-new, try to promote.
+    if (DepType == SDep::Data) {
+      if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
+        if (promoteToDotNew(I, DepType, II, RC)) {
+          PromotedToDotNew = true;
+          continue;
+        }
+      }
+      if (HII->isNewValueJump(I))
+        continue;
+    }
+
+    // For predicated instructions, if the predicates are complements then
+    // there can be no dependence.
+    if (HII->isPredicated(I) && HII->isPredicated(J) &&
+        arePredicatesComplements(I, J)) {
+      // Not always safe to do this translation.
+      // DAG Builder attempts to reduce dependence edges using transitive
+      // nature of dependencies. Here is an example:
+      //
+      // r0 = tfr_pt ... (1)
+      // r0 = tfr_pf ... (2)
+      // r0 = tfr_pt ... (3)
+      //
+      // There will be an output dependence between (1)->(2) and (2)->(3).
+      // However, there is no dependence edge between (1)->(3). This results
+      // in all 3 instructions going in the same packet. We ignore dependce
+      // only once to avoid this situation.
+      auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+      if (Itr != IgnoreDepMIs.end()) {
+        Dependence = true;
+        return false;
+      }
+      IgnoreDepMIs.push_back(I);
+      continue;
+    }
+
+    // Ignore Order dependences between unconditional direct branches
+    // and non-control-flow instructions.
+    if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+        DepType == SDep::Order)
+      continue;
+
+    // Ignore all dependences for jumps except for true and output
+    // dependences.
+    if (I->isConditionalBranch() && DepType != SDep::Data &&
+        DepType != SDep::Output)
+      continue;
+
+    // Ignore output dependences due to superregs. We can write to two
+    // different subregisters of R1:0 for instance in the same cycle.
+
+    // If neither I nor J defines DepReg, then this is a superfluous output
+    // dependence. The dependence must be of the form:
+    //   R0 = ...
+    //   R1 = ...
+    // and there is an output dependence between the two instructions with
+    // DepReg = D0.
+    // We want to ignore these dependences. Ideally, the dependence
+    // constructor should annotate such dependences. We can then avoid this
+    // relatively expensive check.
+    //
+    if (DepType == SDep::Output) {
+      // DepReg is the register that's responsible for the dependence.
+      unsigned DepReg = SUJ->Succs[i].getReg();
+
+      // Check if I and J really defines DepReg.
+      if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+        continue;
+      FoundSequentialDependence = true;
+      break;
+    }
+
+    // For Order dependences:
+    // 1. On V4 or later, volatile loads/stores can be packetized together,
+    //    unless other rules prevent is.
+    // 2. Store followed by a load is not allowed.
+    // 3. Store followed by a store is only valid on V4 or later.
+    // 4. Load followed by any memory operation is allowed.
+    if (DepType == SDep::Order) {
+      if (!PacketizeVolatiles) {
+        bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+        if (OrdRefs) {
+          FoundSequentialDependence = true;
+          break;
+        }
+      }
+      // J is first, I is second.
+      bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
+      bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+      if (StoreJ) {
+        // Two stores are only allowed on V4+. Load following store is never
+        // allowed.
+        if (LoadI) {
+          FoundSequentialDependence = true;
+          break;
+        }
+      } else if (!LoadJ || (!LoadI && !StoreI)) {
+        // If J is neither load nor store, assume a dependency.
+        // If J is a load, but I is neither, also assume a dependency.
+        FoundSequentialDependence = true;
+        break;
+      }
+      // Store followed by store: not OK on V2.
+      // Store followed by load: not OK on all.
+      // Load followed by store: OK on all.
+      // Load followed by load: OK on all.
+      continue;
+    }
+
+    // For V4, special case ALLOCFRAME. Even though there is dependency
+    // between ALLOCFRAME and subsequent store, allow it to be packetized
+    // in a same packet. This implies that the store is using the caller's
+    // SP. Hence, offset needs to be updated accordingly.
+    if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
+      unsigned Opc = I->getOpcode();
+      switch (Opc) {
+        case Hexagon::S2_storerd_io:
+        case Hexagon::S2_storeri_io:
+        case Hexagon::S2_storerh_io:
+        case Hexagon::S2_storerb_io:
+          if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
+            int64_t Imm = I->getOperand(1).getImm();
+            int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
+            if (HII->isValidOffset(Opc, NewOff)) {
+              GlueAllocframeStore = true;
+              // Since this store is to be glued with allocframe in the same
+              // packet, it will use SP of the previous stack frame, i.e.
+              // caller's SP. Therefore, we need to recalculate offset
+              // according to this change.
+              I->getOperand(1).setImm(NewOff);
+              continue;
+            }
+          }
+        default:
+          break;
+      }
+    }
+
+    // Skip over anti-dependences. Two instructions that are anti-dependent
+    // can share a packet.
+    if (DepType != SDep::Anti) {
+      FoundSequentialDependence = true;
+      break;
+    }
+  }
+
+  if (FoundSequentialDependence) {
+    Dependence = true;
+    return false;
+  }
+
+  return true;
+}
+
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+  MachineInstr *I = SUI->getInstr();
+  MachineInstr *J = SUJ->getInstr();
+  assert(I && J && "Unable to packetize null instruction!");
+
+  if (cannotCoexist(I, J))
+    return false;
+
+  if (!Dependence)
+    return true;
+
+  // Check if the instruction was promoted to a dot-new. If so, demote it
+  // back into a dot-old.
+  if (PromotedToDotNew)
+    demoteToDotOld(I);
+
+  cleanUpDotCur();
+  // Check if the instruction (must be a store) was glued with an allocframe
+  // instruction. If so, restore its offset to its original value, i.e. use
+  // current SP instead of caller's SP.
+  if (GlueAllocframeStore) {
+    unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+    MachineOperand &MOff = I->getOperand(1);
+    MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
+  }
+  return false;
+}
+
+
+MachineBasicBlock::iterator
+HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+  MachineBasicBlock::iterator MII = MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  if (MI->isImplicitDef()) {
+    unsigned R = MI->getOperand(0).getReg();
+    if (Hexagon::IntRegsRegClass.contains(R)) {
+      MCSuperRegIterator S(R, HRI, false);
+      MI->addOperand(MachineOperand::CreateReg(*S, true, true));
+    }
+    return MII;
+  }
+  assert(ResourceTracker->canReserveResources(MI));
+
+  bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
+  bool Good = true;
+
+  if (GlueToNewValueJump) {
+    MachineInstr *NvjMI = ++MII;
+    // We need to put both instructions in the same packet: MI and NvjMI.
+    // Either of them can require a constant extender. Try to add both to
+    // the current packet, and if that fails, end the packet and start a
+    // new one.
+    ResourceTracker->reserveResources(MI);
+    if (ExtMI)
+      Good = tryAllocateResourcesForConstExt(true);
+
+    bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
+    if (Good) {
+      if (ResourceTracker->canReserveResources(NvjMI))
+        ResourceTracker->reserveResources(NvjMI);
+      else
+        Good = false;
+    }
+    if (Good && ExtNvjMI)
+      Good = tryAllocateResourcesForConstExt(true);
+
+    if (!Good) {
+      endPacket(MBB, MI);
+      assert(ResourceTracker->canReserveResources(MI));
+      ResourceTracker->reserveResources(MI);
+      if (ExtMI) {
+        assert(canReserveResourcesForConstExt());
+        tryAllocateResourcesForConstExt(true);
+      }
+      assert(ResourceTracker->canReserveResources(NvjMI));
+      ResourceTracker->reserveResources(NvjMI);
+      if (ExtNvjMI) {
+        assert(canReserveResourcesForConstExt());
+        reserveResourcesForConstExt();
+      }
+    }
+    CurrentPacketMIs.push_back(MI);
+    CurrentPacketMIs.push_back(NvjMI);
+    return MII;
+  }
+
+  ResourceTracker->reserveResources(MI);
+  if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
+    endPacket(MBB, MI);
+    if (PromotedToDotNew)
+      demoteToDotOld(MI);
+    ResourceTracker->reserveResources(MI);
+    reserveResourcesForConstExt();
+  }
+
+  CurrentPacketMIs.push_back(MI);
+  return MII;
+}
+
+void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
+      MachineInstr *MI) {
+  OldPacketMIs = CurrentPacketMIs;
+  VLIWPacketizerList::endPacket(MBB, MI);
+}
+
+bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
+  return !producesStall(MI);
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+static bool isDependent(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) {
+  if (!ProdMI->getOperand(0).isReg())
+    return false;
+  unsigned DstReg = ProdMI->getOperand(0).getReg();
+
+  for (auto &Op : ConsMI->operands())
+    if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
+      // The MIs depend on each other.
+      return true;
+
+  return false;
+}
+
+// V60 forward scheduling.
+bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+  // Check whether the previous packet is in a different loop. If this is the
+  // case, there is little point in trying to avoid a stall because that would
+  // favor the rare case (loop entry) over the common case (loop iteration).
+  //
+  // TODO: We should really be able to check all the incoming edges if this is
+  // the first packet in a basic block, so we can avoid stalls from the loop
+  // backedge.
+  if (!OldPacketMIs.empty()) {
+    auto *OldBB = OldPacketMIs.front()->getParent();
+    auto *ThisBB = I->getParent();
+    if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
+      return false;
+  }
+
+  // Check for stall between two vector instructions.
+  if (HII->isV60VectorInstruction(I)) {
+    for (auto J : OldPacketMIs) {
+      if (!HII->isV60VectorInstruction(J))
+        continue;
+      if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+        return true;
+    }
+    return false;
+  }
+
+  // Check for stall between two scalar instructions. First, check that
+  // there is no definition of a use in the current packet, because it
+  // may be a candidate for .new.
+  for (auto J : CurrentPacketMIs)
+    if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+      return false;
+
+  // Check for stall between I and instructions in the previous packet.
+  if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
+    for (auto J : OldPacketMIs) {
+      if (HII->isV60VectorInstruction(J))
+        continue;
+      if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+        continue;
+      if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+  return new HexagonPacketizer();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
new file mode 100644
index 0000000..960cf6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -0,0 +1,114 @@
+#ifndef HEXAGONVLIWPACKETIZER_H
+#define HEXAGONVLIWPACKETIZER_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+namespace llvm {
+class HexagonPacketizerList : public VLIWPacketizerList {
+  // Vector of instructions assigned to the packet that has just been created.
+  std::vector<MachineInstr*> OldPacketMIs;
+
+  // Has the instruction been promoted to a dot-new instruction.
+  bool PromotedToDotNew;
+
+  // Has the instruction been glued to allocframe.
+  bool GlueAllocframeStore;
+
+  // Has the feeder instruction been glued to new value jump.
+  bool GlueToNewValueJump;
+
+  // Check if there is a dependence between some instruction already in this
+  // packet and this instruction.
+  bool Dependence;
+
+  // Only check for dependence if there are resources available to
+  // schedule this instruction.
+  bool FoundSequentialDependence;
+
+  // Track MIs with ignored dependence.
+  std::vector<MachineInstr*> IgnoreDepMIs;
+
+protected:
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+  const MachineLoopInfo *MLI;
+
+private:
+  const HexagonInstrInfo *HII;
+  const HexagonRegisterInfo *HRI;
+
+public:
+  // Ctor.
+  HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                        AliasAnalysis *AA,
+                        const MachineBranchProbabilityInfo *MBPI);
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() override;
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB) override;
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(const MachineInstr *MI) override;
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
+  void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override;
+  bool shouldAddToPacket(const MachineInstr *MI) override;
+
+  void unpacketizeSoloInstrs(MachineFunction &MF);
+
+protected:
+  bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+                       unsigned DepReg);
+  bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+                       MachineBasicBlock::iterator &MII,
+                       const TargetRegisterClass* RC);
+  bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+                          unsigned DepReg, MachineBasicBlock::iterator &MII,
+                          const TargetRegisterClass* RC);
+  void cleanUpDotCur();
+
+  bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+                       MachineBasicBlock::iterator &MII,
+                       const TargetRegisterClass* RC);
+  bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+                          unsigned DepReg, MachineBasicBlock::iterator &MII,
+                          const TargetRegisterClass* RC);
+  bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+                            unsigned DepReg, MachineBasicBlock::iterator &MII);
+  bool canPromoteToNewValueStore(const MachineInstr* MI,
+                                 const MachineInstr* PacketMI, unsigned DepReg);
+  bool demoteToDotOld(MachineInstr* MI);
+  bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2);
+  bool restrictingDepExistInPacket(MachineInstr*, unsigned);
+  bool isNewifiable(const MachineInstr *MI);
+  bool isCurifiable(MachineInstr* MI);
+  bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+  inline bool isPromotedToDotNew() const {
+    return PromotedToDotNew;
+  }
+  bool tryAllocateResourcesForConstExt(bool Reserve);
+  bool canReserveResourcesForConstExt();
+  void reserveResourcesForConstExt();
+  bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
+  bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
+  bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
+  bool producesStall(const MachineInstr *MI);
+};
+} // namespace llvm
+#endif // HEXAGONVLIWPACKETIZER_H
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
new file mode 100644
index 0000000..b73af82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -0,0 +1,360 @@
+//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonFixupKinds.h"
+#include "HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-asm-backend"
+
+namespace {
+
+class HexagonAsmBackend : public MCAsmBackend {
+  uint8_t OSABI;
+  StringRef CPU;
+  mutable uint64_t relaxedCnt;
+  std::unique_ptr <MCInstrInfo> MCII;
+  std::unique_ptr <MCInst *> RelaxTarget;
+  MCInst * Extender;
+public:
+  HexagonAsmBackend(Target const &T,  uint8_t OSABI, StringRef CPU) :
+    OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+    Extender(nullptr) {}
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+    return createHexagonELFObjectWriter(OS, OSABI, CPU);
+  }
+
+  void setExtender(MCContext &Context) const {
+    if (Extender == nullptr)
+      const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+  }
+
+  MCInst *takeExtender() const {
+    assert(Extender != nullptr);
+    MCInst * Result = Extender;
+    const_cast<HexagonAsmBackend *>(this)->Extender = nullptr;
+    return Result;
+  }
+
+  unsigned getNumFixupKinds() const override {
+    return Hexagon::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+    const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = {
+        // This table *must* be in same the order of fixup_* kinds in
+        // HexagonFixupKinds.h.
+        //
+        // namei                          offset  bits    flags
+        {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_LO16", 0, 32, 0},
+        {"fixup_Hexagon_HI16", 0, 32, 0},
+        {"fixup_Hexagon_32", 0, 32, 0},
+        {"fixup_Hexagon_16", 0, 32, 0},
+        {"fixup_Hexagon_8", 0, 32, 0},
+        {"fixup_Hexagon_GPREL16_0", 0, 32, 0},
+        {"fixup_Hexagon_GPREL16_1", 0, 32, 0},
+        {"fixup_Hexagon_GPREL16_2", 0, 32, 0},
+        {"fixup_Hexagon_GPREL16_3", 0, 32, 0},
+        {"fixup_Hexagon_HL16", 0, 32, 0},
+        {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_16_X", 0, 32, 0},
+        {"fixup_Hexagon_12_X", 0, 32, 0},
+        {"fixup_Hexagon_11_X", 0, 32, 0},
+        {"fixup_Hexagon_10_X", 0, 32, 0},
+        {"fixup_Hexagon_9_X", 0, 32, 0},
+        {"fixup_Hexagon_8_X", 0, 32, 0},
+        {"fixup_Hexagon_7_X", 0, 32, 0},
+        {"fixup_Hexagon_6_X", 0, 32, 0},
+        {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_COPY", 0, 32, 0},
+        {"fixup_Hexagon_GLOB_DAT", 0, 32, 0},
+        {"fixup_Hexagon_JMP_SLOT", 0, 32, 0},
+        {"fixup_Hexagon_RELATIVE", 0, 32, 0},
+        {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0},
+        {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0},
+        {"fixup_Hexagon_GOTREL_32", 0, 32, 0},
+        {"fixup_Hexagon_GOT_LO16", 0, 32, 0},
+        {"fixup_Hexagon_GOT_HI16", 0, 32, 0},
+        {"fixup_Hexagon_GOT_32", 0, 32, 0},
+        {"fixup_Hexagon_GOT_16", 0, 32, 0},
+        {"fixup_Hexagon_DTPMOD_32", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_32", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_16", 0, 32, 0},
+        {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_32", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_16", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_32", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_16", 0, 32, 0},
+        {"fixup_Hexagon_IE_LO16", 0, 32, 0},
+        {"fixup_Hexagon_IE_HI16", 0, 32, 0},
+        {"fixup_Hexagon_IE_32", 0, 32, 0},
+        {"fixup_Hexagon_IE_16", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_32", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_16", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_LO16", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_HI16", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_32", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_16", 0, 32, 0},
+        {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+        {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0},
+        {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0},
+        {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_GOT_16_X", 0, 32, 0},
+        {"fixup_Hexagon_GOT_11_X", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0},
+        {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0},
+        {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0},
+        {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0},
+        {"fixup_Hexagon_IE_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_IE_16_X", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0},
+        {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_16_X", 0, 32, 0},
+        {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}};
+
+    if (Kind < FirstTargetFixupKind) {
+      return MCAsmBackend::getFixupKindInfo(Kind);
+    }
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/,
+                  unsigned /*DataSize*/, uint64_t /*Value*/,
+                  bool /*IsPCRel*/) const override {
+    return;
+  }
+
+  bool isInstRelaxable(MCInst const &HMI) const {
+    const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI);
+    bool Relaxable = false;
+    // Branches and loop-setup insns are handled as necessary by relaxation.
+    if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ ||
+        (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV &&
+         MCID.isBranch()) ||
+        (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR &&
+         HMI.getOpcode() != Hexagon::C4_addipc))
+      if (HexagonMCInstrInfo::isExtendable(*MCII, HMI))
+        Relaxable = true;
+
+    return Relaxable;
+  }
+
+  /// MayNeedRelaxation - Check whether the given instruction may need
+  /// relaxation.
+  ///
+  /// \param Inst - The instruction to test.
+  bool mayNeedRelaxation(MCInst const &Inst) const override {
+    assert(HexagonMCInstrInfo::isBundle(Inst));
+    bool PreviousIsExtender = false;
+    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+      auto const &Inst = *I.getInst();
+      if (!PreviousIsExtender) {
+        if (isInstRelaxable(Inst))
+          return true;
+      }
+      PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst);
+    }
+    return false;
+  }
+
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+                                    uint64_t Value,
+                                    const MCRelaxableFragment *DF,
+                                    const MCAsmLayout &Layout) const override {
+    MCInst const &MCB = DF->getInst();
+    assert(HexagonMCInstrInfo::isBundle(MCB));
+
+    *RelaxTarget = nullptr;
+    MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction(
+        MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE));
+    // If we cannot resolve the fixup value, it requires relaxation.
+    if (!Resolved) {
+      switch ((unsigned)Fixup.getKind()) {
+      case fixup_Hexagon_B22_PCREL:
+      // GetFixupCount assumes B22 won't relax
+      // Fallthrough
+      default:
+        return false;
+        break;
+      case fixup_Hexagon_B13_PCREL:
+      case fixup_Hexagon_B15_PCREL:
+      case fixup_Hexagon_B9_PCREL:
+      case fixup_Hexagon_B7_PCREL: {
+        if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+          ++relaxedCnt;
+          *RelaxTarget = &MCI;
+          setExtender(Layout.getAssembler().getContext());
+          return true;
+        } else {
+          return false;
+        }
+        break;
+      }
+      }
+    }
+    bool Relaxable = isInstRelaxable(MCI);
+    if (Relaxable == false)
+      return false;
+
+    MCFixupKind Kind = Fixup.getKind();
+    int64_t sValue = Value;
+    int64_t maxValue;
+
+    switch ((unsigned)Kind) {
+    case fixup_Hexagon_B7_PCREL:
+      maxValue = 1 << 8;
+      break;
+    case fixup_Hexagon_B9_PCREL:
+      maxValue = 1 << 10;
+      break;
+    case fixup_Hexagon_B15_PCREL:
+      maxValue = 1 << 16;
+      break;
+    case fixup_Hexagon_B22_PCREL:
+      maxValue = 1 << 23;
+      break;
+    default:
+      maxValue = INT64_MAX;
+      break;
+    }
+
+    bool isFarAway = -maxValue > sValue || sValue > maxValue - 1;
+
+    if (isFarAway) {
+      if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
+        ++relaxedCnt;
+        *RelaxTarget = &MCI;
+        setExtender(Layout.getAssembler().getContext());
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /// Simple predicate for targets where !Resolved implies requiring relaxation
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
+  }
+
+  void relaxInstruction(MCInst const & Inst,
+                        MCInst & Res) const override {
+    assert(HexagonMCInstrInfo::isBundle(Inst) &&
+           "Hexagon relaxInstruction only works on bundles");
+
+    Res = HexagonMCInstrInfo::createBundle();
+    // Copy the results into the bundle.
+    bool Update = false;
+    for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+      MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst());
+
+      // if immediate extender needed, add it in
+      if (*RelaxTarget == &CrntHMI) {
+        Update = true;
+        assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) &&
+               "No room to insert extender for relaxation");
+
+        MCInst *HMIx = takeExtender();
+        *HMIx = HexagonMCInstrInfo::deriveExtender(
+                *MCII, CrntHMI,
+                HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI));
+        Res.addOperand(MCOperand::createInst(HMIx));
+        *RelaxTarget = nullptr;
+      }
+      // now copy over the original instruction(the one we may have extended)
+      Res.addOperand(MCOperand::createInst(I.getInst()));
+    }
+    (void)Update;
+    assert(Update && "Didn't find relaxation target");
+  }
+
+  bool writeNopData(uint64_t Count,
+                    MCObjectWriter * OW) const override {
+    static const uint32_t Nopcode  = 0x7f000000, // Hard-coded NOP.
+                          ParseIn  = 0x00004000, // In packet parse-bits.
+                          ParseEnd = 0x0000c000; // End of packet parse-bits.
+
+    while(Count % HEXAGON_INSTR_SIZE) {
+      DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" <<
+          Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n");
+      --Count;
+      OW->write8(0);
+    }
+
+    while(Count) {
+      Count -= HEXAGON_INSTR_SIZE;
+      // Close the packet whenever a multiple of the maximum packet size remains
+      uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))?
+                           ParseIn: ParseEnd;
+      OW->write32(Nopcode | ParseBits);
+    }
+    return true;
+  }
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+                                      MCRegisterInfo const & /*MRI*/,
+                                      const Triple &TT, StringRef CPU) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+  return new HexagonAsmBackend(T, OSABI, CPU);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
new file mode 100644
index 0000000..47a6f86
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -0,0 +1,285 @@
+//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Hexagon target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <stdint.h>
+
+namespace llvm {
+
+/// HexagonII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace HexagonII {
+  // *** The code below must match HexagonInstrFormat*.td *** //
+
+  // Insn types.
+  // *** Must match HexagonInstrFormat*.td ***
+  enum Type {
+    TypePSEUDO  = 0,
+    TypeALU32   = 1,
+    TypeCR      = 2,
+    TypeJR      = 3,
+    TypeJ       = 4,
+    TypeLD      = 5,
+    TypeST      = 6,
+    TypeSYSTEM  = 7,
+    TypeXTYPE   = 8,
+    TypeMEMOP   = 9,
+    TypeNV      = 10,
+    TypeDUPLEX  = 11,
+    TypeCOMPOUND = 12,
+    TypeCVI_FIRST     = 13,
+    TypeCVI_VA        = TypeCVI_FIRST,
+    TypeCVI_VA_DV     = 14,
+    TypeCVI_VX        = 15,
+    TypeCVI_VX_DV     = 16,
+    TypeCVI_VP        = 17,
+    TypeCVI_VP_VS     = 18,
+    TypeCVI_VS        = 19,
+    TypeCVI_VINLANESAT= 20,
+    TypeCVI_VM_LD     = 21,
+    TypeCVI_VM_TMP_LD = 22,
+    TypeCVI_VM_CUR_LD = 23,
+    TypeCVI_VM_VP_LDU = 24,
+    TypeCVI_VM_ST     = 25,
+    TypeCVI_VM_NEW_ST = 26,
+    TypeCVI_VM_STU    = 27,
+    TypeCVI_HIST      = 28,
+    TypeCVI_LAST      = TypeCVI_HIST,
+    TypePREFIX  = 30, // Such as extenders.
+    TypeENDLOOP = 31  // Such as end of a HW loop.
+  };
+
+  enum SubTarget {
+    HasV2SubT     = 0xf,
+    HasV2SubTOnly = 0x1,
+    NoV2SubT      = 0x0,
+    HasV3SubT     = 0xe,
+    HasV3SubTOnly = 0x2,
+    NoV3SubT      = 0x1,
+    HasV4SubT     = 0xc,
+    NoV4SubT      = 0x3,
+    HasV5SubT     = 0x8,
+    NoV5SubT      = 0x7
+  };
+
+  enum AddrMode {
+    NoAddrMode     = 0,  // No addressing mode
+    Absolute       = 1,  // Absolute addressing mode
+    AbsoluteSet    = 2,  // Absolute set addressing mode
+    BaseImmOffset  = 3,  // Indirect with offset
+    BaseLongOffset = 4,  // Indirect with long offset
+    BaseRegOffset  = 5,  // Indirect with register offset
+    PostInc        = 6   // Post increment addressing mode
+  };
+
+  // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+  enum class MemAccessSize {
+    NoMemAccess = 0,            // Not a memory acces instruction.
+    ByteAccess = 1,             // Byte access instruction (memb).
+    HalfWordAccess = 2,         // Half word access instruction (memh).
+    WordAccess = 3,             // Word access instruction (memw).
+    DoubleWordAccess = 4,       // Double word access instruction (memd)
+                    // 5,       // We do not have a 16 byte vector access.
+    Vector64Access = 7,         // 64 Byte vector access instruction (vmem).
+    Vector128Access = 8         // 128 Byte vector access instruction (vmem).
+  };
+
+  // MCInstrDesc TSFlags
+  // *** Must match HexagonInstrFormat*.td ***
+  enum {
+    // This 5-bit field describes the insn type.
+    TypePos  = 0,
+    TypeMask = 0x1f,
+
+    // Solo instructions.
+    SoloPos  = 5,
+    SoloMask = 0x1,
+    // Packed only with A or X-type instructions.
+    SoloAXPos  = 6,
+    SoloAXMask = 0x1,
+    // Only A-type instruction in first slot or nothing.
+    SoloAin1Pos  = 7,
+    SoloAin1Mask = 0x1,
+
+    // Predicated instructions.
+    PredicatedPos  = 8,
+    PredicatedMask = 0x1,
+    PredicatedFalsePos  = 9,
+    PredicatedFalseMask = 0x1,
+    PredicatedNewPos  = 10,
+    PredicatedNewMask = 0x1,
+    PredicateLatePos  = 11,
+    PredicateLateMask = 0x1,
+
+    // New-Value consumer instructions.
+    NewValuePos  = 12,
+    NewValueMask = 0x1,
+    // New-Value producer instructions.
+    hasNewValuePos  = 13,
+    hasNewValueMask = 0x1,
+    // Which operand consumes or produces a new value.
+    NewValueOpPos  = 14,
+    NewValueOpMask = 0x7,
+    // Stores that can become new-value stores.
+    mayNVStorePos  = 17,
+    mayNVStoreMask = 0x1,
+    // New-value store instructions.
+    NVStorePos  = 18,
+    NVStoreMask = 0x1,
+    // Loads that can become current-value loads.
+    mayCVLoadPos  = 19,
+    mayCVLoadMask = 0x1,
+    // Current-value load instructions.
+    CVLoadPos  = 20,
+    CVLoadMask = 0x1,
+
+    // Extendable insns.
+    ExtendablePos  = 21,
+    ExtendableMask = 0x1,
+    // Insns must be extended.
+    ExtendedPos  = 22,
+    ExtendedMask = 0x1,
+    // Which operand may be extended.
+    ExtendableOpPos  = 23,
+    ExtendableOpMask = 0x7,
+    // Signed or unsigned range.
+    ExtentSignedPos  = 26,
+    ExtentSignedMask = 0x1,
+    // Number of bits of range before extending operand.
+    ExtentBitsPos  = 27,
+    ExtentBitsMask = 0x1f,
+    // Alignment power-of-two before extending operand.
+    ExtentAlignPos  = 32,
+    ExtentAlignMask = 0x3,
+
+    // Valid subtargets
+    validSubTargetPos  = 34,
+    validSubTargetMask = 0xf,
+
+    // Addressing mode for load/store instructions.
+    AddrModePos  = 40,
+    AddrModeMask = 0x7,
+    // Access size for load/store instructions.
+    MemAccessSizePos = 43,
+    MemAccesSizeMask = 0xf,
+
+    // Branch predicted taken.
+    TakenPos = 47,
+    TakenMask = 0x1,
+
+    // Floating-point instructions.
+    FPPos  = 48,
+    FPMask = 0x1,
+
+    // New-Value producer-2 instructions.
+    hasNewValuePos2  = 50,
+    hasNewValueMask2 = 0x1,
+
+    // Which operand consumes or produces a new value.
+    NewValueOpPos2  = 51,
+    NewValueOpMask2 = 0x7,
+
+    // Accumulator instructions.
+    AccumulatorPos = 54,
+    AccumulatorMask = 0x1,
+
+    // Complex XU, prevent xu competition by prefering slot3
+    PrefersSlot3Pos = 55,
+    PrefersSlot3Mask = 0x1,
+  };
+
+  // *** The code above must match HexagonInstrFormat*.td *** //
+
+  // Hexagon specific MO operand flag mask.
+  enum HexagonMOTargetFlagVal {
+    //===------------------------------------------------------------------===//
+    // Hexagon Specific MachineOperand flags.
+    MO_NO_FLAG,
+
+    HMOTF_ConstExtended = 1,
+
+    /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+    /// Used for computing a global address for PIC compilations
+    MO_PCREL,
+
+    /// MO_GOT - Indicates a GOT-relative relocation
+    MO_GOT,
+
+    // Low or high part of a symbol.
+    MO_LO16, MO_HI16,
+
+    // Offset from the base of the SDA.
+    MO_GPREL
+  };
+
+  // Hexagon Sub-instruction classes.
+  enum SubInstructionGroup {
+    HSIG_None = 0,
+    HSIG_L1,
+    HSIG_L2,
+    HSIG_S1,
+    HSIG_S2,
+    HSIG_A,
+    HSIG_Compound
+  };
+
+  // Hexagon Compound classes.
+  enum CompoundGroup {
+    HCG_None = 0,
+    HCG_A,
+    HCG_B,
+    HCG_C
+  };
+
+  enum InstParseBits {
+    INST_PARSE_MASK       = 0x0000c000,
+    INST_PARSE_PACKET_END = 0x0000c000,
+    INST_PARSE_LOOP_END   = 0x00008000,
+    INST_PARSE_NOT_END    = 0x00004000,
+    INST_PARSE_DUPLEX     = 0x00000000,
+    INST_PARSE_EXTENDER   = 0x00000000
+  };
+
+  enum InstIClassBits : unsigned {
+    INST_ICLASS_MASK      = 0xf0000000,
+    INST_ICLASS_EXTENDER  = 0x00000000,
+    INST_ICLASS_J_1       = 0x10000000,
+    INST_ICLASS_J_2       = 0x20000000,
+    INST_ICLASS_LD_ST_1   = 0x30000000,
+    INST_ICLASS_LD_ST_2   = 0x40000000,
+    INST_ICLASS_J_3       = 0x50000000,
+    INST_ICLASS_CR        = 0x60000000,
+    INST_ICLASS_ALU32_1   = 0x70000000,
+    INST_ICLASS_XTYPE_1   = 0x80000000,
+    INST_ICLASS_LD        = 0x90000000,
+    INST_ICLASS_ST        = 0xa0000000,
+    INST_ICLASS_ALU32_2   = 0xb0000000,
+    INST_ICLASS_XTYPE_2   = 0xc0000000,
+    INST_ICLASS_XTYPE_3   = 0xd0000000,
+    INST_ICLASS_XTYPE_4   = 0xe0000000,
+    INST_ICLASS_ALU32_3   = 0xf0000000
+  };
+
+} // End namespace HexagonII.
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
new file mode 100644
index 0000000..da5d4d1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -0,0 +1,251 @@
+//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "hexagon-elf-writer"
+
+using namespace llvm;
+using namespace Hexagon;
+
+namespace {
+
+class HexagonELFObjectWriter : public MCELFObjectTargetWriter {
+private:
+  StringRef CPU;
+
+public:
+  HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
+
+  unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+                        bool IsPCRel) const override;
+};
+}
+
+HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
+    : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
+                              /*HasRelocationAddend*/ true),
+      CPU(C) {}
+
+unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/,
+                                              MCFixup const &Fixup,
+                                              bool IsPCRel) const {
+  switch ((unsigned)Fixup.getKind()) {
+  default:
+    DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+    llvm_unreachable("Unimplemented Fixup kind!");
+    return ELF::R_HEX_NONE;
+  case FK_Data_4:
+    return (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+  case FK_PCRel_4:
+    return ELF::R_HEX_32_PCREL;
+  case FK_Data_2:
+    return ELF::R_HEX_16;
+  case FK_Data_1:
+    return ELF::R_HEX_8;
+  case fixup_Hexagon_B22_PCREL:
+    return ELF::R_HEX_B22_PCREL;
+  case fixup_Hexagon_B15_PCREL:
+    return ELF::R_HEX_B15_PCREL;
+  case fixup_Hexagon_B7_PCREL:
+    return ELF::R_HEX_B7_PCREL;
+  case fixup_Hexagon_LO16:
+    return ELF::R_HEX_LO16;
+  case fixup_Hexagon_HI16:
+    return ELF::R_HEX_HI16;
+  case fixup_Hexagon_32:
+    return ELF::R_HEX_32;
+  case fixup_Hexagon_16:
+    return ELF::R_HEX_16;
+  case fixup_Hexagon_8:
+    return ELF::R_HEX_8;
+  case fixup_Hexagon_GPREL16_0:
+    return ELF::R_HEX_GPREL16_0;
+  case fixup_Hexagon_GPREL16_1:
+    return ELF::R_HEX_GPREL16_1;
+  case fixup_Hexagon_GPREL16_2:
+    return ELF::R_HEX_GPREL16_2;
+  case fixup_Hexagon_GPREL16_3:
+    return ELF::R_HEX_GPREL16_3;
+  case fixup_Hexagon_HL16:
+    return ELF::R_HEX_HL16;
+  case fixup_Hexagon_B13_PCREL:
+    return ELF::R_HEX_B13_PCREL;
+  case fixup_Hexagon_B9_PCREL:
+    return ELF::R_HEX_B9_PCREL;
+  case fixup_Hexagon_B32_PCREL_X:
+    return ELF::R_HEX_B32_PCREL_X;
+  case fixup_Hexagon_32_6_X:
+    return ELF::R_HEX_32_6_X;
+  case fixup_Hexagon_B22_PCREL_X:
+    return ELF::R_HEX_B22_PCREL_X;
+  case fixup_Hexagon_B15_PCREL_X:
+    return ELF::R_HEX_B15_PCREL_X;
+  case fixup_Hexagon_B13_PCREL_X:
+    return ELF::R_HEX_B13_PCREL_X;
+  case fixup_Hexagon_B9_PCREL_X:
+    return ELF::R_HEX_B9_PCREL_X;
+  case fixup_Hexagon_B7_PCREL_X:
+    return ELF::R_HEX_B7_PCREL_X;
+  case fixup_Hexagon_16_X:
+    return ELF::R_HEX_16_X;
+  case fixup_Hexagon_12_X:
+    return ELF::R_HEX_12_X;
+  case fixup_Hexagon_11_X:
+    return ELF::R_HEX_11_X;
+  case fixup_Hexagon_10_X:
+    return ELF::R_HEX_10_X;
+  case fixup_Hexagon_9_X:
+    return ELF::R_HEX_9_X;
+  case fixup_Hexagon_8_X:
+    return ELF::R_HEX_8_X;
+  case fixup_Hexagon_7_X:
+    return ELF::R_HEX_7_X;
+  case fixup_Hexagon_6_X:
+    return ELF::R_HEX_6_X;
+  case fixup_Hexagon_32_PCREL:
+    return ELF::R_HEX_32_PCREL;
+  case fixup_Hexagon_COPY:
+    return ELF::R_HEX_COPY;
+  case fixup_Hexagon_GLOB_DAT:
+    return ELF::R_HEX_GLOB_DAT;
+  case fixup_Hexagon_JMP_SLOT:
+    return ELF::R_HEX_JMP_SLOT;
+  case fixup_Hexagon_RELATIVE:
+    return ELF::R_HEX_RELATIVE;
+  case fixup_Hexagon_PLT_B22_PCREL:
+    return ELF::R_HEX_PLT_B22_PCREL;
+  case fixup_Hexagon_GOTREL_LO16:
+    return ELF::R_HEX_GOTREL_LO16;
+  case fixup_Hexagon_GOTREL_HI16:
+    return ELF::R_HEX_GOTREL_HI16;
+  case fixup_Hexagon_GOTREL_32:
+    return ELF::R_HEX_GOTREL_32;
+  case fixup_Hexagon_GOT_LO16:
+    return ELF::R_HEX_GOT_LO16;
+  case fixup_Hexagon_GOT_HI16:
+    return ELF::R_HEX_GOT_HI16;
+  case fixup_Hexagon_GOT_32:
+    return ELF::R_HEX_GOT_32;
+  case fixup_Hexagon_GOT_16:
+    return ELF::R_HEX_GOT_16;
+  case fixup_Hexagon_DTPMOD_32:
+    return ELF::R_HEX_DTPMOD_32;
+  case fixup_Hexagon_DTPREL_LO16:
+    return ELF::R_HEX_DTPREL_LO16;
+  case fixup_Hexagon_DTPREL_HI16:
+    return ELF::R_HEX_DTPREL_HI16;
+  case fixup_Hexagon_DTPREL_32:
+    return ELF::R_HEX_DTPREL_32;
+  case fixup_Hexagon_DTPREL_16:
+    return ELF::R_HEX_DTPREL_16;
+  case fixup_Hexagon_GD_PLT_B22_PCREL:
+    return ELF::R_HEX_GD_PLT_B22_PCREL;
+  case fixup_Hexagon_LD_PLT_B22_PCREL:
+    return ELF::R_HEX_LD_PLT_B22_PCREL;
+  case fixup_Hexagon_GD_GOT_LO16:
+    return ELF::R_HEX_GD_GOT_LO16;
+  case fixup_Hexagon_GD_GOT_HI16:
+    return ELF::R_HEX_GD_GOT_HI16;
+  case fixup_Hexagon_GD_GOT_32:
+    return ELF::R_HEX_GD_GOT_32;
+  case fixup_Hexagon_GD_GOT_16:
+    return ELF::R_HEX_GD_GOT_16;
+  case fixup_Hexagon_LD_GOT_LO16:
+    return ELF::R_HEX_LD_GOT_LO16;
+  case fixup_Hexagon_LD_GOT_HI16:
+    return ELF::R_HEX_LD_GOT_HI16;
+  case fixup_Hexagon_LD_GOT_32:
+    return ELF::R_HEX_LD_GOT_32;
+  case fixup_Hexagon_LD_GOT_16:
+    return ELF::R_HEX_LD_GOT_16;
+  case fixup_Hexagon_IE_LO16:
+    return ELF::R_HEX_IE_LO16;
+  case fixup_Hexagon_IE_HI16:
+    return ELF::R_HEX_IE_HI16;
+  case fixup_Hexagon_IE_32:
+    return ELF::R_HEX_IE_32;
+  case fixup_Hexagon_IE_GOT_LO16:
+    return ELF::R_HEX_IE_GOT_LO16;
+  case fixup_Hexagon_IE_GOT_HI16:
+    return ELF::R_HEX_IE_GOT_HI16;
+  case fixup_Hexagon_IE_GOT_32:
+    return ELF::R_HEX_IE_GOT_32;
+  case fixup_Hexagon_IE_GOT_16:
+    return ELF::R_HEX_IE_GOT_16;
+  case fixup_Hexagon_TPREL_LO16:
+    return ELF::R_HEX_TPREL_LO16;
+  case fixup_Hexagon_TPREL_HI16:
+    return ELF::R_HEX_TPREL_HI16;
+  case fixup_Hexagon_TPREL_32:
+    return ELF::R_HEX_TPREL_32;
+  case fixup_Hexagon_TPREL_16:
+    return ELF::R_HEX_TPREL_16;
+  case fixup_Hexagon_6_PCREL_X:
+    return ELF::R_HEX_6_PCREL_X;
+  case fixup_Hexagon_GOTREL_32_6_X:
+    return ELF::R_HEX_GOTREL_32_6_X;
+  case fixup_Hexagon_GOTREL_16_X:
+    return ELF::R_HEX_GOTREL_16_X;
+  case fixup_Hexagon_GOTREL_11_X:
+    return ELF::R_HEX_GOTREL_11_X;
+  case fixup_Hexagon_GOT_32_6_X:
+    return ELF::R_HEX_GOT_32_6_X;
+  case fixup_Hexagon_GOT_16_X:
+    return ELF::R_HEX_GOT_16_X;
+  case fixup_Hexagon_GOT_11_X:
+    return ELF::R_HEX_GOT_11_X;
+  case fixup_Hexagon_DTPREL_32_6_X:
+    return ELF::R_HEX_DTPREL_32_6_X;
+  case fixup_Hexagon_DTPREL_16_X:
+    return ELF::R_HEX_DTPREL_16_X;
+  case fixup_Hexagon_DTPREL_11_X:
+    return ELF::R_HEX_DTPREL_11_X;
+  case fixup_Hexagon_GD_GOT_32_6_X:
+    return ELF::R_HEX_GD_GOT_32_6_X;
+  case fixup_Hexagon_GD_GOT_16_X:
+    return ELF::R_HEX_GD_GOT_16_X;
+  case fixup_Hexagon_GD_GOT_11_X:
+    return ELF::R_HEX_GD_GOT_11_X;
+  case fixup_Hexagon_LD_GOT_32_6_X:
+    return ELF::R_HEX_LD_GOT_32_6_X;
+  case fixup_Hexagon_LD_GOT_16_X:
+    return ELF::R_HEX_LD_GOT_16_X;
+  case fixup_Hexagon_LD_GOT_11_X:
+    return ELF::R_HEX_LD_GOT_11_X;
+  case fixup_Hexagon_IE_32_6_X:
+    return ELF::R_HEX_IE_32_6_X;
+  case fixup_Hexagon_IE_16_X:
+    return ELF::R_HEX_IE_16_X;
+  case fixup_Hexagon_IE_GOT_32_6_X:
+    return ELF::R_HEX_IE_GOT_32_6_X;
+  case fixup_Hexagon_IE_GOT_16_X:
+    return ELF::R_HEX_IE_GOT_16_X;
+  case fixup_Hexagon_IE_GOT_11_X:
+    return ELF::R_HEX_IE_GOT_11_X;
+  case fixup_Hexagon_TPREL_32_6_X:
+    return ELF::R_HEX_TPREL_32_6_X;
+  case fixup_Hexagon_TPREL_16_X:
+    return ELF::R_HEX_TPREL_16_X;
+  case fixup_Hexagon_TPREL_11_X:
+    return ELF::R_HEX_TPREL_11_X;
+  }
+}
+
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+                                                   uint8_t OSABI,
+                                                   StringRef CPU) {
+  MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
new file mode 100644
index 0000000..4bbfbec
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -0,0 +1,137 @@
+//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+#define LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Hexagon {
+enum Fixups {
+  // Branch fixups for R_HEX_B{22,15,7}_PCREL.
+  fixup_Hexagon_B22_PCREL = FirstTargetFixupKind,
+  fixup_Hexagon_B15_PCREL,
+  fixup_Hexagon_B7_PCREL,
+  fixup_Hexagon_LO16,
+  fixup_Hexagon_HI16,
+  fixup_Hexagon_32,
+  fixup_Hexagon_16,
+  fixup_Hexagon_8,
+  fixup_Hexagon_GPREL16_0,
+  fixup_Hexagon_GPREL16_1,
+  fixup_Hexagon_GPREL16_2,
+  fixup_Hexagon_GPREL16_3,
+  fixup_Hexagon_HL16,
+  fixup_Hexagon_B13_PCREL,
+  fixup_Hexagon_B9_PCREL,
+  fixup_Hexagon_B32_PCREL_X,
+  fixup_Hexagon_32_6_X,
+  fixup_Hexagon_B22_PCREL_X,
+  fixup_Hexagon_B15_PCREL_X,
+  fixup_Hexagon_B13_PCREL_X,
+  fixup_Hexagon_B9_PCREL_X,
+  fixup_Hexagon_B7_PCREL_X,
+  fixup_Hexagon_16_X,
+  fixup_Hexagon_12_X,
+  fixup_Hexagon_11_X,
+  fixup_Hexagon_10_X,
+  fixup_Hexagon_9_X,
+  fixup_Hexagon_8_X,
+  fixup_Hexagon_7_X,
+  fixup_Hexagon_6_X,
+  fixup_Hexagon_32_PCREL,
+  fixup_Hexagon_COPY,
+  fixup_Hexagon_GLOB_DAT,
+  fixup_Hexagon_JMP_SLOT,
+  fixup_Hexagon_RELATIVE,
+  fixup_Hexagon_PLT_B22_PCREL,
+  fixup_Hexagon_GOTREL_LO16,
+  fixup_Hexagon_GOTREL_HI16,
+  fixup_Hexagon_GOTREL_32,
+  fixup_Hexagon_GOT_LO16,
+  fixup_Hexagon_GOT_HI16,
+  fixup_Hexagon_GOT_32,
+  fixup_Hexagon_GOT_16,
+  fixup_Hexagon_DTPMOD_32,
+  fixup_Hexagon_DTPREL_LO16,
+  fixup_Hexagon_DTPREL_HI16,
+  fixup_Hexagon_DTPREL_32,
+  fixup_Hexagon_DTPREL_16,
+  fixup_Hexagon_GD_PLT_B22_PCREL,
+  fixup_Hexagon_LD_PLT_B22_PCREL,
+  fixup_Hexagon_GD_GOT_LO16,
+  fixup_Hexagon_GD_GOT_HI16,
+  fixup_Hexagon_GD_GOT_32,
+  fixup_Hexagon_GD_GOT_16,
+  fixup_Hexagon_LD_GOT_LO16,
+  fixup_Hexagon_LD_GOT_HI16,
+  fixup_Hexagon_LD_GOT_32,
+  fixup_Hexagon_LD_GOT_16,
+  fixup_Hexagon_IE_LO16,
+  fixup_Hexagon_IE_HI16,
+  fixup_Hexagon_IE_32,
+  fixup_Hexagon_IE_16,
+  fixup_Hexagon_IE_GOT_LO16,
+  fixup_Hexagon_IE_GOT_HI16,
+  fixup_Hexagon_IE_GOT_32,
+  fixup_Hexagon_IE_GOT_16,
+  fixup_Hexagon_TPREL_LO16,
+  fixup_Hexagon_TPREL_HI16,
+  fixup_Hexagon_TPREL_32,
+  fixup_Hexagon_TPREL_16,
+  fixup_Hexagon_6_PCREL_X,
+  fixup_Hexagon_GOTREL_32_6_X,
+  fixup_Hexagon_GOTREL_16_X,
+  fixup_Hexagon_GOTREL_11_X,
+  fixup_Hexagon_GOT_32_6_X,
+  fixup_Hexagon_GOT_16_X,
+  fixup_Hexagon_GOT_11_X,
+  fixup_Hexagon_DTPREL_32_6_X,
+  fixup_Hexagon_DTPREL_16_X,
+  fixup_Hexagon_DTPREL_11_X,
+  fixup_Hexagon_GD_GOT_32_6_X,
+  fixup_Hexagon_GD_GOT_16_X,
+  fixup_Hexagon_GD_GOT_11_X,
+  fixup_Hexagon_LD_GOT_32_6_X,
+  fixup_Hexagon_LD_GOT_16_X,
+  fixup_Hexagon_LD_GOT_11_X,
+  fixup_Hexagon_IE_32_6_X,
+  fixup_Hexagon_IE_16_X,
+  fixup_Hexagon_IE_GOT_32_6_X,
+  fixup_Hexagon_IE_GOT_16_X,
+  fixup_Hexagon_IE_GOT_11_X,
+  fixup_Hexagon_TPREL_32_6_X,
+  fixup_Hexagon_TPREL_16_X,
+  fixup_Hexagon_TPREL_11_X,
+
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+enum FixupBitmaps : unsigned {
+  Word8 = 0xff,
+  Word16 = 0xffff,
+  Word32 = 0xffffffff,
+  Word32_LO = 0x00c03fff,
+  Word32_HL = 0x0, // Not Implemented
+  Word32_GP = 0x0, // Not Implemented
+  Word32_B7 = 0x00001f18,
+  Word32_B9 = 0x003000fe,
+  Word32_B13 = 0x00202ffe,
+  Word32_B15 = 0x00df20fe,
+  Word32_B22 = 0x01ff3ffe,
+  Word32_R6 = 0x000007e0,
+  Word32_U6 = 0x0,  // Not Implemented
+  Word32_U16 = 0x0, // Not Implemented
+  Word32_X26 = 0x0fff3fff
+};
+} // namespace Hexagon
+} // namespace llvm
+
+#endif // LLVM_HEXAGON_HEXAGONFIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
new file mode 100644
index 0000000..06ccec5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -0,0 +1,233 @@
+//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonAsmPrinter.h"
+#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define GET_INSTRUCTION_NAME
+#include "HexagonGenAsmWriter.inc"
+
+HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
+                                       MCInstrInfo const &MII,
+                                       MCRegisterInfo const &MRI)
+    : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
+}
+
+StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return MII.getName(Opcode);
+}
+
+void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << getRegName(RegNo);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+  return getRegisterName(RegNo);
+}
+
+void HexagonInstPrinter::setExtender(MCInst const &MCI) {
+  HasExtender = HexagonMCInstrInfo::isImmext(MCI);
+}
+
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                   StringRef Annot, const MCSubtargetInfo &STI) {
+  assert(HexagonMCInstrInfo::isBundle(*MI));
+  assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+  assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
+  HasExtender = false;
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
+    MCInst const &MCI = *I.getInst();
+    if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
+      printInstruction(MCI.getOperand(1).getInst(), OS);
+      OS << '\v';
+      HasExtender = false;
+      printInstruction(MCI.getOperand(0).getInst(), OS);
+    } else
+      printInstruction(&MCI, OS);
+    setExtender(MCI);
+    OS << "\n";
+  }
+
+  auto Separator = "";
+  if (HexagonMCInstrInfo::isInnerLoop(*MI)) {
+    OS << Separator;
+    Separator = " ";
+    MCInst ME;
+    ME.setOpcode(Hexagon::ENDLOOP0);
+    printInstruction(&ME, OS);
+  }
+  if (HexagonMCInstrInfo::isOuterLoop(*MI)) {
+    OS << Separator;
+    Separator = " ";
+    MCInst ME;
+    ME.setOpcode(Hexagon::ENDLOOP1);
+    printInstruction(&ME, OS);
+  }
+}
+
+void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
+                                      raw_ostream &O) const {
+  if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo &&
+      (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)))
+    O << "#";
+  MCOperand const &MO = MI->getOperand(OpNo);
+  if (MO.isReg()) {
+    O << getRegisterName(MO.getReg());
+  } else if (MO.isExpr()) {
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value))
+      O << formatImm(Value);
+    else
+      O << *MO.getExpr();
+  } else {
+    llvm_unreachable("Unknown operand");
+  }
+}
+
+void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
+                                         raw_ostream &O) const {
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
+                                                 unsigned OpNo,
+                                                 raw_ostream &O) const {
+  O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  O << -MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  O << -1;
+}
+
+void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<9>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+  O << formatImm(Imm/64);
+}
+
+void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<10>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+  O << formatImm(Imm/128);
+}
+
+void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<10>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+  O << formatImm(Imm/64);
+}
+
+void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<11>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+  O << formatImm(Imm/128);
+}
+
+void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
+                                        raw_ostream &O) const {
+  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
+                                           raw_ostream &O) const {
+  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print $+8, an eight byte displacement from the PC.
+  llvm_unreachable("Unknown branch operand.");
+}
+
+void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
+                                          raw_ostream &O) const {}
+
+void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {}
+
+void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
+                                               raw_ostream &O) const {}
+
+void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
+                                     raw_ostream &O, bool hi) const {
+  MCOperand const &MO = MI->getOperand(OpNo);
+
+  O << '#' << (hi ? "HI" : "LO") << '(';
+  if (MO.isImm()) {
+    O << '#';
+    printOperand(MI, OpNo, O);
+  } else {
+    printOperand(MI, OpNo, O);
+    assert("Unknown symbol operand");
+  }
+  O << ')';
+}
+
+void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
+                                       raw_ostream &O) const {
+  MCOperand const &MO = MI->getOperand(OpNo);
+  assert (MO.isExpr());
+  MCExpr const &Expr = *MO.getExpr();
+  int64_t Value;
+  if (Expr.evaluateAsAbsolute(Value))
+    O << format("0x%" PRIx64, Value);
+  else {
+    if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))
+      if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo)
+        O << "##";
+    O << Expr;
+  }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
new file mode 100644
index 0000000..5f42118
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -0,0 +1,92 @@
+//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+/// Prints bundles as a newline separated list of individual instructions
+/// Duplexes are separated by a vertical tab \v character
+/// A trailing line includes bundle properties such as endloop0/1
+///
+/// r0 = add(r1, r2)
+/// r0 = #0 \v jump 0x0
+/// :endloop0 :endloop1
+class HexagonInstPrinter : public MCInstPrinter {
+public:
+  explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
+                              MCRegisterInfo const &MRI);
+  void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  void printInstruction(MCInst const *MI, raw_ostream &O);
+
+  StringRef getRegName(unsigned RegNo) const;
+  static char const *getRegisterName(unsigned RegNo);
+  void printRegName(raw_ostream &O, unsigned RegNo) const override;
+
+  void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
+                               raw_ostream &O) const;
+  void printNegImmOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void printBranchOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void printPredicateOperand(MCInst const *MI, unsigned OpNo,
+                             raw_ostream &O) const;
+  void printGlobalOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+  void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+  void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+    printSymbol(MI, OpNo, O, true);
+  }
+  void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+    printSymbol(MI, OpNo, O, false);
+  }
+
+  MCAsmInfo const &getMAI() const { return MAI; }
+  MCInstrInfo const &getMII() const { return MII; }
+
+protected:
+  void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
+                   bool hi) const;
+
+private:
+  MCInstrInfo const &MII;
+
+  bool HasExtender;
+  void setExtender(MCInst const &MCI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 0000000..51d2f1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,37 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+// Pin the vtable to this file.
+void HexagonMCAsmInfo::anchor() {}
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = nullptr;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "//";
+
+  LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
+  InlineAsmStart = "# InlineAsm Start";
+  InlineAsmEnd = "# InlineAsm End";
+  ZeroDirective = "\t.space\t";
+  AscizDirective = "\t.string\t";
+
+  SupportsDebugInformation = true;
+  UsesELFSectionDirectiveForBSS  = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 0000000..a8456b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class HexagonMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit HexagonMCAsmInfo(const Triple &TT);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
new file mode 100644
index 0000000..46b7b41
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -0,0 +1,581 @@
+//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCChecker.h"
+
+#include "HexagonBaseInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
+  cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+
+const HexagonMCChecker::PredSense
+  HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+
+void HexagonMCChecker::init() {
+  // Initialize read-only registers set.
+  ReadOnly.insert(Hexagon::PC);
+
+  // Figure out the loop-registers definitions.
+  if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+    Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0?
+    Defs[Hexagon::LC0].insert(Unconditional);
+  }
+  if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+    Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0?
+    Defs[Hexagon::LC1].insert(Unconditional);
+  }
+
+  if (HexagonMCInstrInfo::isBundle(MCB))
+    // Unfurl a bundle.
+    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      init(*I.getInst());
+    }
+  else
+    init(MCB);
+}
+
+void HexagonMCChecker::init(MCInst const& MCI) {
+  const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+  unsigned PredReg = Hexagon::NoRegister;
+  bool isTrue = false;
+
+  // Get used registers.
+  for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+    if (MCI.getOperand(i).isReg()) {
+      unsigned R = MCI.getOperand(i).getReg();
+
+      if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+        // Note an used predicate register.
+        PredReg = R;
+        isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+        // Note use of new predicate register.
+        if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+          NewPreds.insert(PredReg);
+      }
+      else
+        // Note register use.  Super-registers are not tracked directly,
+        // but their components.
+        for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+           SRI.isValid();
+           ++SRI)
+         if (!MCSubRegIterator(*SRI, &RI).isValid())
+           // Skip super-registers used indirectly.
+           Uses.insert(*SRI);
+    }
+
+  // Get implicit register definitions.
+  if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
+    for (; *ImpDef; ++ImpDef) {
+      unsigned R = *ImpDef;
+
+      if (Hexagon::R31 != R && MCID.isCall())
+        // Any register other than the LR and the PC are actually volatile ones
+        // as defined by the ABI, not modified implicitly by the call insn.
+        continue;
+      if (Hexagon::PC == R)
+        // Branches are the only insns that can change the PC,
+        // otherwise a read-only register.
+        continue;
+
+      if (Hexagon::USR_OVF == R)
+        // Many insns change the USR implicitly, but only one or another flag.
+        // The instruction table models the USR.OVF flag, which can be implicitly
+        // modified more than once, but cannot be modified in the same packet
+        // with an instruction that modifies is explicitly. Deal with such situ-
+        // ations individually.
+        SoftDefs.insert(R);
+      else if (isPredicateRegister(R) &&
+               HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
+        // Include implicit late predicates.
+        LatePreds.insert(R);
+      else
+        Defs[R].insert(PredSense(PredReg, isTrue));
+    }
+
+  // Figure out explicit register definitions.
+  for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
+    unsigned R = MCI.getOperand(i).getReg(),
+             S = Hexagon::NoRegister;
+
+    // Note register definitions, direct ones as well as indirect side-effects.
+    // Super-registers are not tracked directly, but their components.
+    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+        SRI.isValid();
+        ++SRI) {
+      if (MCSubRegIterator(*SRI, &RI).isValid())
+        // Skip super-registers defined indirectly.
+        continue;
+
+      if (R == *SRI) {
+        if (S == R)
+          // Avoid scoring the defined register multiple times.
+          continue;
+        else
+          // Note that the defined register has already been scored.
+          S = R;
+      }
+
+      if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI)
+        // P3:0 is a special case, since multiple predicate register definitions
+        // in a packet is allowed as the equivalent of their logical "and".
+        // Only an explicit definition of P3:0 is noted as such; if a
+        // side-effect, then note as a soft definition.
+        SoftDefs.insert(*SRI);
+      else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+        // Some insns produce predicates too late to be used in the same packet.
+        LatePreds.insert(*SRI);
+      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+        // Current loads should be used in the same packet.
+        // TODO: relies on the impossibility of a current and a temporary loads
+        // in the same packet.
+        CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
+      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+        // Temporary loads should be used in the same packet, but don't commit
+        // results, so it should be disregarded if another insn changes the same
+        // register.
+        // TODO: relies on the impossibility of a current and a temporary loads
+        // in the same packet.
+        TmpDefs.insert(*SRI);
+      else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+        // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
+        // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
+        Uses.insert(*SRI);
+      else
+        Defs[*SRI].insert(PredSense(PredReg, isTrue));
+    }
+  }
+
+  // Figure out register definitions that produce new values.
+  if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) {
+    unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+    if (HexagonMCInstrInfo::isCompound(MCII, MCI))
+      compoundRegisterMap(R); // Compound insns have a limited register range.
+
+    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+        SRI.isValid();
+        ++SRI)
+      if (!MCSubRegIterator(*SRI, &RI).isValid())
+        // No super-registers defined indirectly.
+        NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+                                              HexagonMCInstrInfo::isFloat(MCII, MCI)));
+
+    // For fairly unique 2-dot-new producers, example:
+    // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
+    if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
+      unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
+
+      for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
+          SRI.isValid();
+          ++SRI)
+        if (!MCSubRegIterator(*SRI, &RI).isValid())
+          NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+                                                HexagonMCInstrInfo::isFloat(MCII, MCI)));
+    }
+  }
+
+  // Figure out definitions of new predicate registers.
+  if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+    for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+      if (MCI.getOperand(i).isReg()) {
+        unsigned P = MCI.getOperand(i).getReg();
+
+        if (isPredicateRegister(P))
+          NewPreds.insert(P);
+      }
+
+  // Figure out uses of new values.
+  if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) {
+    unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+    if (!MCSubRegIterator(N, &RI).isValid()) {
+      // Super-registers cannot use new values.
+      if (MCID.isBranch())
+        NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+      else
+        NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+    }
+  }
+}
+
+HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
+                                   MCRegisterInfo const &ri)
+    : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
+      bLoadErrInfo(false) {
+  init();
+}
+
+bool HexagonMCChecker::check() {
+  bool chkB = checkBranches();
+  bool chkP = checkPredicates();
+  bool chkNV = checkNewValues();
+  bool chkR = checkRegisters();
+  bool chkS = checkSolo();
+  bool chkSh = checkShuffle();
+  bool chkSl = checkSlots();
+  bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+
+  return chk;
+}
+
+bool HexagonMCChecker::checkSlots()
+
+{
+  unsigned slotsUsed = 0;
+  for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
+    MCInst const& MCI = *HMI.getInst();
+    if (HexagonMCInstrInfo::isImmext(MCI))
+      continue;
+    if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
+      slotsUsed += 2;
+    else
+      ++slotsUsed;
+  }
+
+  if (slotsUsed > HEXAGON_PACKET_SIZE) {
+    HexagonMCErrInfo errInfo;
+    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
+    addErrInfo(errInfo);
+    return false;
+  }
+  return true;
+}
+
+// Check legal use of branches.
+bool HexagonMCChecker::checkBranches() {
+  HexagonMCErrInfo errInfo;
+  if (HexagonMCInstrInfo::isBundle(MCB)) {
+    bool hasConditional = false;
+    unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
+             NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+             Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
+
+    for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
+         i < MCB.size(); ++i) {
+      MCInst const &MCI = *MCB.begin()[i].getInst();
+
+      if (HexagonMCInstrInfo::isImmext(MCI))
+        continue;
+      if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
+          HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
+        ++Branches;
+        if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
+            HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+          ++NewIndirectBranches;
+        if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
+          ++NewValueBranches;
+
+        if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
+            HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
+          hasConditional = true;
+          Conditional = i; // Record the position of the conditional branch.
+        } else {
+          Unconditional = i; // Record the position of the unconditional branch.
+        }
+      }
+      if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
+          HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
+        ++Returns;
+    }
+
+    if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
+      if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+          HexagonMCInstrInfo::isOuterLoop(MCB)) {
+        // Error out if there's any branch in a loop-end packet.
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
+        addErrInfo(errInfo);
+        return false;
+      }
+    if (Branches > 1)
+      if (!hasConditional || Conditional > Unconditional) {
+        // Error out if more than one unconditional branch or
+        // the conditional branch appears after the unconditional one.
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
+        addErrInfo(errInfo);
+        return false;
+      }
+  }
+
+  return true;
+}
+
+// Check legal use of predicate registers.
+bool HexagonMCChecker::checkPredicates() {
+  HexagonMCErrInfo errInfo;
+  // Check for proper use of new predicate registers.
+  for (const auto& I : NewPreds) {
+    unsigned P = I;
+
+    if (!Defs.count(P) || LatePreds.count(P)) {
+      // Error out if the new predicate register is not defined,
+      // or defined "late"
+      // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  // Check for proper use of auto-anded of predicate registers.
+  for (const auto& I : LatePreds) {
+    unsigned P = I;
+
+    if (LatePreds.count(P) > 1 || Defs.count(P)) {
+      // Error out if predicate register defined "late" multiple times or
+      // defined late and regularly defined
+      // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Check legal use of new values.
+bool HexagonMCChecker::checkNewValues() {
+  HexagonMCErrInfo errInfo;
+  memset(&errInfo, 0, sizeof(errInfo));
+  for (auto& I : NewUses) {
+    unsigned R = I.first;
+    NewSense &US = I.second;
+
+    if (!hasValidNewValueDef(US, NewDefs[R])) {
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Check for legal register uses and definitions.
+bool HexagonMCChecker::checkRegisters() {
+  HexagonMCErrInfo errInfo;
+  // Check for proper register definitions.
+  for (const auto& I : Defs) {
+    unsigned R = I.first;
+
+    if (ReadOnly.count(R)) {
+      // Error out for definitions of read-only registers.
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (isLoopRegister(R) && Defs.count(R) > 1 &&
+        (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+         HexagonMCInstrInfo::isOuterLoop(MCB))) {
+      // Error out for definitions of loop registers at the end of a loop.
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (SoftDefs.count(R)) {
+      // Error out for explicit changes to registers also weakly defined
+      // (e.g., "{ usr = r0; r0 = sfadd(...) }").
+      unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
+      unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+      // Check for multiple register definitions.
+      PredSet &PM = Defs[R];
+
+      // Check for multiple unconditional register definitions.
+      if (PM.count(Unconditional)) {
+        // Error out on an unconditional change when there are any other
+        // changes, conditional or not.
+        unsigned UsrR = Hexagon::USR;
+        unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+        addErrInfo(errInfo);
+        return false;
+      }
+      // Check for multiple conditional register definitions.
+      for (const auto& J : PM) {
+        PredSense P = J;
+
+        // Check for multiple uses of the same condition.
+        if (PM.count(P) > 1) {
+          // Error out on conditional changes based on the same predicate
+          // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
+          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+          addErrInfo(errInfo);
+          return false;
+        }
+        // Check for the use of the complementary condition.
+        P.second = !P.second;
+        if (PM.count(P) && PM.size() > 2) {
+          // Error out on conditional changes based on the same predicate
+          // multiple times
+          // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
+          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+          addErrInfo(errInfo);
+          return false;
+        }
+      }
+    }
+  }
+
+  // Check for use of current definitions.
+  for (const auto& I : CurDefs) {
+    unsigned R = I;
+
+    if (!Uses.count(R)) {
+      // Warn on an unused current definition.
+      errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
+      addErrInfo(errInfo);
+      return true;
+    }
+  }
+
+  // Check for use of temporary definitions.
+  for (const auto& I : TmpDefs) {
+    unsigned R = I;
+
+    if (!Uses.count(R)) {
+      // special case for vhist
+      bool vHistFound = false;
+      for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+        if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
+          vHistFound = true;  // vhist() implicitly uses ALL REGxx.tmp
+          break;
+        }
+      }
+      // Warn on an unused temporary definition.
+      if (vHistFound == false) {
+        errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
+        addErrInfo(errInfo);
+        return true;
+      }
+    }
+  }
+
+  return true;
+}
+
+// Check for legal use of solo insns.
+bool HexagonMCChecker::checkSolo() {
+  HexagonMCErrInfo errInfo;
+  if (HexagonMCInstrInfo::isBundle(MCB) &&
+      HexagonMCInstrInfo::bundleSize(MCB) > 1) {
+    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
+        addErrInfo(errInfo);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool HexagonMCChecker::checkShuffle() {
+  HexagonMCErrInfo errInfo;
+  // Branch info is lost when duplexing. The unduplexed insns must be
+  // checked and only branch errors matter for this case.
+  HexagonMCShuffler MCS(MCII, STI, MCB);
+  if (!MCS.check()) {
+    if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+      errInfo.setShuffleError(MCS.getError());
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+  HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+  if (!MCSDX.check()) {
+    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+    errInfo.setShuffleError(MCSDX.getError());
+    addErrInfo(errInfo);
+    return false;
+  }
+  return true;
+}
+
+void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+  switch (Register) {
+  default:
+    break;
+  case Hexagon::R15:
+    Register = Hexagon::R23;
+    break;
+  case Hexagon::R14:
+    Register = Hexagon::R22;
+    break;
+  case Hexagon::R13:
+    Register = Hexagon::R21;
+    break;
+  case Hexagon::R12:
+    Register = Hexagon::R20;
+    break;
+  case Hexagon::R11:
+    Register = Hexagon::R19;
+    break;
+  case Hexagon::R10:
+    Register = Hexagon::R18;
+    break;
+  case Hexagon::R9:
+    Register = Hexagon::R17;
+    break;
+  case Hexagon::R8:
+    Register = Hexagon::R16;
+    break;
+  }
+}
+
+bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
+      const NewSenseList &Defs) const {
+  bool Strict = !RelaxNVChecks;
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    const NewSense &Def = Defs[i];
+    // NVJ cannot use a new FP value [7.6.1]
+    if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0))
+      continue;
+    // If the definition was not predicated, then it does not matter if
+    // the use is.
+    if (Def.PredReg == 0)
+      return true;
+    // With the strict checks, both the definition and the use must be
+    // predicated on the same register and condition.
+    if (Strict) {
+      if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond)
+        return true;
+    } else {
+      // With the relaxed checks, if the definition was predicated, the only
+      // detectable violation is if the use is predicated on the opposing
+      // condition, otherwise, it's ok.
+      if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond)
+        return true;
+    }
+  }
+  return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
new file mode 100644
index 0000000..5fc0bde
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -0,0 +1,218 @@
+//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCHECKER_H
+#define HEXAGONMCCHECKER_H
+
+#include <map>
+#include <set>
+#include <queue>
+#include "MCTargetDesc/HexagonMCShuffler.h"
+
+using namespace llvm;
+
+namespace llvm {
+class MCOperandInfo;
+
+typedef struct {
+  unsigned Error, Warning, ShuffleError;
+  unsigned Register;
+} ErrInfo_T;
+
+class HexagonMCErrInfo {
+public:
+  enum {
+    CHECK_SUCCESS         = 0,
+    // Errors.
+    CHECK_ERROR_BRANCHES  = 0x00001,
+    CHECK_ERROR_NEWP      = 0x00002,
+    CHECK_ERROR_NEWV      = 0x00004,
+    CHECK_ERROR_REGISTERS = 0x00008,
+    CHECK_ERROR_READONLY  = 0x00010,
+    CHECK_ERROR_LOOP      = 0x00020,
+    CHECK_ERROR_ENDLOOP   = 0x00040,
+    CHECK_ERROR_SOLO      = 0x00080,
+    CHECK_ERROR_SHUFFLE   = 0x00100,
+    CHECK_ERROR_NOSLOTS   = 0x00200,
+    CHECK_ERROR_UNKNOWN   = 0x00400,
+    // Warnings.
+    CHECK_WARN_CURRENT    = 0x10000,
+    CHECK_WARN_TEMPORARY  = 0x20000
+  };
+  ErrInfo_T s;
+
+  void reset() {
+    s.Error = CHECK_SUCCESS;
+    s.Warning = CHECK_SUCCESS;
+    s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
+    s.Register = Hexagon::NoRegister;
+  };
+  HexagonMCErrInfo() {
+    reset();
+  };
+
+  void setError(unsigned e, unsigned r = Hexagon::NoRegister)
+    { s.Error = e; s.Register = r; };
+  void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
+    { s.Warning = w; s.Register = r; };
+  void setShuffleError(unsigned e) { s.ShuffleError = e; };
+};
+
+/// Check for a valid bundle.
+class HexagonMCChecker {
+  /// Insn bundle.
+  MCInst& MCB;
+  MCInst& MCBDX;
+  const MCRegisterInfo& RI;
+  MCInstrInfo const &MCII;
+  MCSubtargetInfo const &STI;
+  bool bLoadErrInfo;
+
+  /// Set of definitions: register #, if predicated, if predicated true.
+  typedef std::pair<unsigned, bool> PredSense;
+  static const PredSense Unconditional;
+  typedef std::multiset<PredSense> PredSet;
+  typedef std::multiset<PredSense>::iterator PredSetIterator;
+
+  typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator;
+  llvm::DenseMap<unsigned, PredSet> Defs;
+
+  /// Information about how a new-value register is defined or used:
+  ///   PredReg = predicate register, 0 if use/def not predicated,
+  ///   Cond    = true/false for if(PredReg)/if(!PredReg) respectively,
+  ///   IsFloat = true if definition produces a floating point value
+  ///             (not valid for uses),
+  ///   IsNVJ   = true if the use is a new-value branch (not valid for
+  ///             definitions).
+  struct NewSense {
+    unsigned PredReg;
+    bool IsFloat, IsNVJ, Cond;
+    // The special-case "constructors":
+    static NewSense Jmp(bool isNVJ) {
+      NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
+                      /*Cond=*/ false };
+      return NS;
+    }
+    static NewSense Use(unsigned PR, bool True) {
+      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
+                      /*Cond=*/ True };
+      return NS;
+    }
+    static NewSense Def(unsigned PR, bool True, bool Float) {
+      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
+                      /*Cond=*/ True };
+      return NS;
+    }
+  };
+  /// Set of definitions that produce new register:
+  typedef llvm::SmallVector<NewSense,2> NewSenseList;
+  typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
+  llvm::DenseMap<unsigned, NewSenseList> NewDefs;
+
+  /// Set of weak definitions whose clashes should be enforced selectively.
+  typedef std::set<unsigned>::iterator SoftDefsIterator;
+  std::set<unsigned> SoftDefs;
+
+  /// Set of current definitions committed to the register file.
+  typedef std::set<unsigned>::iterator CurDefsIterator;
+  std::set<unsigned> CurDefs;
+
+  /// Set of temporary definitions not committed to the register file.
+  typedef std::set<unsigned>::iterator TmpDefsIterator;
+  std::set<unsigned> TmpDefs;
+
+  /// Set of new predicates used.
+  typedef std::set<unsigned>::iterator NewPredsIterator;
+  std::set<unsigned> NewPreds;
+
+  /// Set of predicates defined late.
+  typedef std::multiset<unsigned>::iterator LatePredsIterator;
+  std::multiset<unsigned> LatePreds;
+
+  /// Set of uses.
+  typedef std::set<unsigned>::iterator UsesIterator;
+  std::set<unsigned> Uses;
+
+  /// Set of new values used: new register, if new-value jump.
+  typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator;
+  llvm::DenseMap<unsigned, NewSense> NewUses;
+
+  /// Pre-defined set of read-only registers.
+  typedef std::set<unsigned>::iterator ReadOnlyIterator;
+  std::set<unsigned> ReadOnly;
+
+  std::queue<ErrInfo_T> ErrInfoQ;
+  HexagonMCErrInfo CrntErrInfo;
+
+  void getErrInfo() {
+    if (bLoadErrInfo == true) {
+      if (ErrInfoQ.empty()) {
+        CrntErrInfo.reset();
+      } else {
+        CrntErrInfo.s = ErrInfoQ.front();
+        ErrInfoQ.pop();
+      }
+    }
+    bLoadErrInfo = false;
+  }
+
+  void init();
+  void init(MCInst const&);
+
+  // Checks performed.
+  bool checkBranches();
+  bool checkPredicates();
+  bool checkNewValues();
+  bool checkRegisters();
+  bool checkSolo();
+  bool checkShuffle();
+  bool checkSlots();
+
+  static void compoundRegisterMap(unsigned&);
+
+  bool isPredicateRegister(unsigned R) const {
+    return (Hexagon::P0 == R || Hexagon::P1 == R ||
+            Hexagon::P2 == R || Hexagon::P3 == R);
+  };
+  bool isLoopRegister(unsigned R) const {
+    return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
+            Hexagon::SA1 == R || Hexagon::LC1 == R);
+  };
+
+  bool hasValidNewValueDef(const NewSense &Use,
+                           const NewSenseList &Defs) const;
+
+  public:
+  explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
+                            const MCRegisterInfo& ri);
+
+  bool check();
+
+  /// add a new error/warning
+  void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
+
+  /// Return the error code for the last operation in the insn bundle.
+  unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
+  unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
+  unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
+  unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
+  bool getNextErrInfo() {
+    bLoadErrInfo = true;
+    return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
+  }
+};
+
+}
+
+#endif // HEXAGONMCCHECKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
new file mode 100644
index 0000000..c2c6275
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -0,0 +1,741 @@
+//===-- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonFixupKinds.h"
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+using namespace llvm;
+using namespace Hexagon;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
+                                           MCContext &aMCT)
+    : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
+      Extended(new bool(false)), CurrentBundle(new MCInst const *) {}
+
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last,
+                                         MCInst const &MCB,
+                                         MCInst const &MCI) const {
+  bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
+  if (Instruction == 0) {
+    if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+      assert(!Duplex);
+      assert(Instruction != Last);
+      return HexagonII::INST_PARSE_LOOP_END;
+    }
+  }
+  if (Instruction == 1) {
+    if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+      assert(!Duplex);
+      assert(Instruction != Last);
+      return HexagonII::INST_PARSE_LOOP_END;
+    }
+  }
+  if (Duplex) {
+    assert(Instruction == Last);
+    return HexagonII::INST_PARSE_DUPLEX;
+  }
+  if(Instruction == Last)
+    return HexagonII::INST_PARSE_PACKET_END;
+  return HexagonII::INST_PARSE_NOT_END;
+}
+
+void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             MCSubtargetInfo const &STI) const {
+  MCInst &HMB = const_cast<MCInst &>(MI);
+
+  assert(HexagonMCInstrInfo::isBundle(HMB));
+  DEBUG(dbgs() << "Encoding bundle\n";);
+  *Addend = 0;
+  *Extended = false;
+  *CurrentBundle = &MI;
+  size_t Instruction = 0;
+  size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
+  for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
+    MCInst &HMI = const_cast<MCInst &>(*I.getInst());
+    EncodeSingleInstruction(HMI, OS, Fixups, STI,
+                            parseBits(Instruction, Last, HMB, HMI),
+                            Instruction);
+    *Extended = HexagonMCInstrInfo::isImmext(HMI);
+    *Addend += HEXAGON_INSTR_SIZE;
+    ++Instruction;
+  }
+  return;
+}
+
+/// EncodeSingleInstruction - Emit a single
+void HexagonMCCodeEmitter::EncodeSingleInstruction(
+    const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const {
+  MCInst HMB = MI;
+  assert(!HexagonMCInstrInfo::isBundle(HMB));
+  uint64_t Binary;
+
+  // Compound instructions are limited to using registers 0-7 and 16-23
+  // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
+  static unsigned RegMap[8] = {Hexagon::R8,  Hexagon::R9,  Hexagon::R10,
+                               Hexagon::R11, Hexagon::R12, Hexagon::R13,
+                               Hexagon::R14, Hexagon::R15};
+
+  // Pseudo instructions don't get encoded and shouldn't be here
+  // in the first place!
+  assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
+         "pseudo-instruction found");
+  DEBUG(dbgs() << "Encoding insn"
+                  " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+                                                                    "\n");
+
+  if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
+    for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
+      if (HMB.getOperand(i).isReg()) {
+        unsigned Reg =
+            MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
+        if ((Reg <= 23) && (Reg >= 16))
+          HMB.getOperand(i).setReg(RegMap[Reg - 16]);
+      }
+  }
+
+  if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
+    // Calculate the new value distance to the associated producer
+    MCOperand &MCO =
+        HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB));
+    unsigned SOffset = 0;
+    unsigned Register = MCO.getReg();
+    unsigned Register1;
+    auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+    auto i = Instructions.begin() + Index - 1;
+    for (;; --i) {
+      assert(i != Instructions.begin() - 1 && "Couldn't find producer");
+      MCInst const &Inst = *i->getInst();
+      if (HexagonMCInstrInfo::isImmext(Inst))
+        continue;
+      ++SOffset;
+      Register1 =
+          HexagonMCInstrInfo::hasNewValue(MCII, Inst)
+              ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
+              : static_cast<unsigned>(Hexagon::NoRegister);
+      if (Register != Register1)
+        // This isn't the register we're looking for
+        continue;
+      if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+        // Producer is unpredicated
+        break;
+      assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) &&
+             "Unpredicated consumer depending on predicated producer");
+      if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
+          HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB))
+        // Producer predicate sense matched ours
+        break;
+    }
+    // Hexagon PRM 10.11 Construct Nt from distance
+    unsigned Offset = SOffset;
+    Offset <<= 1;
+    MCO.setReg(Offset + Hexagon::R0);
+  }
+
+  Binary = getBinaryCodeForInstr(HMB, Fixups, STI);
+  // Check for unimplemented instructions. Immediate extenders
+  // are encoded as zero, so they need to be accounted for.
+  if ((!Binary) &&
+      ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) &&
+       (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) &&
+       (HMB.getOpcode() != A4_ext_g))) {
+    // Use a A2_nop for unimplemented instructions.
+    DEBUG(dbgs() << "Unimplemented inst: "
+                    " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+                                                                      "\n");
+    llvm_unreachable("Unimplemented Instruction");
+  }
+  Binary |= Parse;
+
+  // if we need to emit a duplexed instruction
+  if (HMB.getOpcode() >= Hexagon::DuplexIClass0 &&
+      HMB.getOpcode() <= Hexagon::DuplexIClassF) {
+    assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
+           "Emitting duplex without duplex parse bits");
+    unsigned dupIClass;
+    switch (HMB.getOpcode()) {
+    case Hexagon::DuplexIClass0:
+      dupIClass = 0;
+      break;
+    case Hexagon::DuplexIClass1:
+      dupIClass = 1;
+      break;
+    case Hexagon::DuplexIClass2:
+      dupIClass = 2;
+      break;
+    case Hexagon::DuplexIClass3:
+      dupIClass = 3;
+      break;
+    case Hexagon::DuplexIClass4:
+      dupIClass = 4;
+      break;
+    case Hexagon::DuplexIClass5:
+      dupIClass = 5;
+      break;
+    case Hexagon::DuplexIClass6:
+      dupIClass = 6;
+      break;
+    case Hexagon::DuplexIClass7:
+      dupIClass = 7;
+      break;
+    case Hexagon::DuplexIClass8:
+      dupIClass = 8;
+      break;
+    case Hexagon::DuplexIClass9:
+      dupIClass = 9;
+      break;
+    case Hexagon::DuplexIClassA:
+      dupIClass = 10;
+      break;
+    case Hexagon::DuplexIClassB:
+      dupIClass = 11;
+      break;
+    case Hexagon::DuplexIClassC:
+      dupIClass = 12;
+      break;
+    case Hexagon::DuplexIClassD:
+      dupIClass = 13;
+      break;
+    case Hexagon::DuplexIClassE:
+      dupIClass = 14;
+      break;
+    case Hexagon::DuplexIClassF:
+      dupIClass = 15;
+      break;
+    default:
+      llvm_unreachable("Unimplemented DuplexIClass");
+      break;
+    }
+    // 29 is the bit position.
+    // 0b1110 =0xE bits are masked off and down shifted by 1 bit.
+    // Last bit is moved to bit position 13
+    Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
+
+    const MCInst *subInst0 = HMB.getOperand(0).getInst();
+    const MCInst *subInst1 = HMB.getOperand(1).getInst();
+
+    // get subinstruction slot 0
+    unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
+    // get subinstruction slot 1
+    unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI);
+
+    Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16);
+  }
+  support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
+  ++MCNumEmitted;
+}
+
+static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+                                      const MCOperand &MO,
+                                      const MCSymbolRefExpr::VariantKind kind) {
+  const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+  unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
+
+  if (insnType == HexagonII::TypePREFIX) {
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_32_6_X;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
+    default:
+      if (MCID.isBranch())
+        return Hexagon::fixup_Hexagon_B32_PCREL_X;
+      else
+        return Hexagon::fixup_Hexagon_32_6_X;
+    }
+  } else if (MCID.isBranch())
+    return (Hexagon::fixup_Hexagon_B13_PCREL);
+
+  switch (MCID.getOpcode()) {
+  case Hexagon::HI:
+  case Hexagon::A2_tfrih:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_HI16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_HI16;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_HI16;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_HI16;
+    default:
+      return Hexagon::fixup_Hexagon_HI16;
+    }
+
+  case Hexagon::LO:
+  case Hexagon::A2_tfril:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      return Hexagon::fixup_Hexagon_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      return Hexagon::fixup_Hexagon_GOTREL_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      return Hexagon::fixup_Hexagon_GD_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      return Hexagon::fixup_Hexagon_LD_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      return Hexagon::fixup_Hexagon_IE_LO16;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      return Hexagon::fixup_Hexagon_IE_GOT_LO16;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      return Hexagon::fixup_Hexagon_TPREL_LO16;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      return Hexagon::fixup_Hexagon_DTPREL_LO16;
+    default:
+      return Hexagon::fixup_Hexagon_LO16;
+    }
+
+  // The only relocs left should be GP relative:
+  default:
+    if (MCID.mayStore() || MCID.mayLoad()) {
+      for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
+           ++ImpUses) {
+        if (*ImpUses == Hexagon::GP) {
+          switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+          case HexagonII::MemAccessSize::ByteAccess:
+            return fixup_Hexagon_GPREL16_0;
+          case HexagonII::MemAccessSize::HalfWordAccess:
+            return fixup_Hexagon_GPREL16_1;
+          case HexagonII::MemAccessSize::WordAccess:
+            return fixup_Hexagon_GPREL16_2;
+          case HexagonII::MemAccessSize::DoubleWordAccess:
+            return fixup_Hexagon_GPREL16_3;
+          default:
+            llvm_unreachable("unhandled fixup");
+          }
+        }
+      }
+    } else
+      llvm_unreachable("unhandled fixup");
+  }
+
+  return LastTargetFixupKind;
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+namespace {
+  bool isPCRel (unsigned Kind) {
+    switch(Kind){
+    case fixup_Hexagon_B22_PCREL:
+    case fixup_Hexagon_B15_PCREL:
+    case fixup_Hexagon_B7_PCREL:
+    case fixup_Hexagon_B13_PCREL:
+    case fixup_Hexagon_B9_PCREL:
+    case fixup_Hexagon_B32_PCREL_X:
+    case fixup_Hexagon_B22_PCREL_X:
+    case fixup_Hexagon_B15_PCREL_X:
+    case fixup_Hexagon_B13_PCREL_X:
+    case fixup_Hexagon_B9_PCREL_X:
+    case fixup_Hexagon_B7_PCREL_X:
+    case fixup_Hexagon_32_PCREL:
+    case fixup_Hexagon_PLT_B22_PCREL:
+    case fixup_Hexagon_GD_PLT_B22_PCREL:
+    case fixup_Hexagon_LD_PLT_B22_PCREL:
+    case fixup_Hexagon_6_PCREL_X:
+      return true;
+    default:
+      return false;
+    }
+  }
+}
+
+unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
+                                              const MCOperand &MO,
+                                              const MCExpr *ME,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI) const
+
+{
+  int64_t Res;
+
+  if (ME->evaluateAsAbsolute(Res))
+    return Res;
+
+  MCExpr::ExprKind MK = ME->getKind();
+  if (MK == MCExpr::Constant) {
+    return cast<MCConstantExpr>(ME)->getValue();
+  }
+  if (MK == MCExpr::Binary) {
+    getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+    getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+    return 0;
+  }
+
+  assert(MK == MCExpr::SymbolRef);
+
+  Hexagon::Fixups FixupKind =
+      Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16);
+  const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME);
+  const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
+  unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
+                  HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+  const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind();
+
+  DEBUG(dbgs() << "----------------------------------------\n");
+  DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
+               << "\n");
+  DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n");
+  DEBUG(dbgs() << "Relocation bits: " << bits << "\n");
+  DEBUG(dbgs() << "Addend: " << *Addend << "\n");
+  DEBUG(dbgs() << "----------------------------------------\n");
+
+  switch (bits) {
+  default:
+    DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n');
+    break;
+
+  case 32:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+      FixupKind = Hexagon::fixup_Hexagon_32_PCREL;
+      break;
+    case llvm::MCSymbolRefExpr::VK_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_GOTOFF:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X
+                            : Hexagon::fixup_Hexagon_GOTREL_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_GD_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_LD_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X
+                            : Hexagon::fixup_Hexagon_IE_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X
+                            : Hexagon::fixup_Hexagon_IE_GOT_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_TPREL:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X
+                            : Hexagon::fixup_Hexagon_TPREL_32;
+      break;
+    case llvm::MCSymbolRefExpr::VK_DTPREL:
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X
+                            : Hexagon::fixup_Hexagon_DTPREL_32;
+      break;
+    default:
+      FixupKind =
+          *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32;
+      break;
+    }
+    break;
+
+  case 22:
+    switch (kind) {
+    case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+      FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
+      break;
+    case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+      FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
+      break;
+    default:
+      if (MCID.isBranch() || MCID.isCall()) {
+        FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
+                              : Hexagon::fixup_Hexagon_B22_PCREL;
+      } else {
+        errs() << "unrecognized relocation, bits: " << bits << "\n";
+        errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+      }
+      break;
+    }
+    break;
+
+  case 16:
+    if (*Extended) {
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE:
+        FixupKind = Hexagon::fixup_Hexagon_IE_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
+        break;
+      }
+    } else
+      switch (kind) {
+      default:
+        errs() << "unrecognized relocation, bits " << bits << "\n";
+        errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        if ((MCID.getOpcode() == Hexagon::HI) ||
+            (MCID.getOpcode() == Hexagon::LO_H))
+          FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
+        else
+          FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL:
+        FixupKind = Hexagon::fixup_Hexagon_GPREL16_0;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LO16:
+        FixupKind = Hexagon::fixup_Hexagon_LO16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_HI16:
+        FixupKind = Hexagon::fixup_Hexagon_HI16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_16;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_16;
+        break;
+      }
+    break;
+
+  case 15:
+    if (MCID.isBranch() || MCID.isCall())
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X
+                            : Hexagon::fixup_Hexagon_B15_PCREL;
+    break;
+
+  case 13:
+    if (MCID.isBranch())
+      FixupKind = Hexagon::fixup_Hexagon_B13_PCREL;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 12:
+    if (*Extended)
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_12_X;
+        break;
+      // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
+        break;
+      }
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 11:
+    if (*Extended)
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_TPREL:
+        FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_DTPREL:
+        FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X;
+        break;
+      }
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 10:
+    if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_10_X;
+    break;
+
+  case 9:
+    if (MCID.isBranch() ||
+        (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
+                            : Hexagon::fixup_Hexagon_B9_PCREL;
+    else if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_9_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 8:
+    if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_8_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 7:
+    if (MCID.isBranch() ||
+        (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
+                            : Hexagon::fixup_Hexagon_B7_PCREL;
+    else if (*Extended)
+      FixupKind = Hexagon::fixup_Hexagon_7_X;
+    else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 6:
+    if (*Extended) {
+      switch (kind) {
+      default:
+        FixupKind = Hexagon::fixup_Hexagon_6_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL:
+        FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X;
+        break;
+      // This is part of an extender, GOT_11 is a
+      // Word32_U6 unsigned/truncated reloc.
+      case llvm::MCSymbolRefExpr::VK_GOT:
+        FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
+        break;
+      case llvm::MCSymbolRefExpr::VK_GOTOFF:
+        FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
+        break;
+      }
+    } else {
+      errs() << "unrecognized relocation, bits " << bits << "\n";
+      errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n";
+    }
+    break;
+
+  case 0:
+    FixupKind = getFixupNoBits(MCII, MI, MO, kind);
+    break;
+  }
+
+  MCExpr const *FixupExpression = (*Addend > 0 && isPCRel(FixupKind)) ?
+    MCBinaryExpr::createAdd(MO.getExpr(),
+                            MCConstantExpr::create(*Addend, MCT), MCT) :
+    MO.getExpr();
+
+  MCFixup fixup = MCFixup::create(*Addend, FixupExpression, 
+                                  MCFixupKind(FixupKind), MI.getLoc());
+  Fixups.push_back(fixup);
+  // All of the information is in the fixup.
+  return (0);
+}
+
+unsigned
+HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        MCSubtargetInfo const &STI) const {
+  if (MO.isReg())
+    return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  // MO must be an ME.
+  assert(MO.isExpr());
+  return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI);
+}
+
+MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
+                                                MCRegisterInfo const &MRI,
+                                                MCContext &MCT) {
+  return new HexagonMCCodeEmitter(MII, MCT);
+}
+
+#include "HexagonGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
new file mode 100644
index 0000000..2a154da
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -0,0 +1,70 @@
+//===-- HexagonMCCodeEmitter.h - Hexagon Target Descriptions ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Definition for classes that emit Hexagon machine code from MCInsts
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCODEEMITTER_H
+#define HEXAGONMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class HexagonMCCodeEmitter : public MCCodeEmitter {
+  MCContext &MCT;
+  MCInstrInfo const &MCII;
+  std::unique_ptr<unsigned> Addend;
+  std::unique_ptr<bool> Extended;
+  std::unique_ptr<MCInst const *> CurrentBundle;
+
+  // helper routine for getMachineOpValue()
+  unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
+                          const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI) const;
+
+public:
+  HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
+
+  // Return parse bits for instruction `MCI' inside bundle `MCB'
+  uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
+                    MCInst const &MCI) const;
+
+  void encodeInstruction(MCInst const &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         MCSubtargetInfo const &STI) const override;
+
+  void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
+                               SmallVectorImpl<MCFixup> &Fixups,
+                               const MCSubtargetInfo &STI,
+                               uint32_t Parse, size_t Index) const;
+
+  // \brief TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(MCInst const &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 MCSubtargetInfo const &STI) const;
+
+  /// \brief Return binary encoding of operand.
+  unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             MCSubtargetInfo const &STI) const;
+}; // class HexagonMCCodeEmitter
+
+} // namespace llvm
+
+#endif /* HEXAGONMCCODEEMITTER_H */
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
new file mode 100644
index 0000000..d194bea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -0,0 +1,427 @@
+
+//=== HexagonMCCompound.cpp - Hexagon Compound checker  -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is looks at a packet and tries to form compound insns
+//
+//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-mccompound"
+
+enum OpcodeIndex {
+  fp0_jump_nt = 0,
+  fp0_jump_t,
+  fp1_jump_nt,
+  fp1_jump_t,
+  tp0_jump_nt,
+  tp0_jump_t,
+  tp1_jump_nt,
+  tp1_jump_t
+};
+
+static const unsigned tstBitOpcode[8] = {
+    J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t,  J4_tstbit0_fp1_jump_nt,
+    J4_tstbit0_fp1_jump_t,  J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t,
+    J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t};
+static const unsigned cmpeqBitOpcode[8] = {
+    J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t,  J4_cmpeq_fp1_jump_nt,
+    J4_cmpeq_fp1_jump_t,  J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t,
+    J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t};
+static const unsigned cmpgtBitOpcode[8] = {
+    J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t,  J4_cmpgt_fp1_jump_nt,
+    J4_cmpgt_fp1_jump_t,  J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t,
+    J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t};
+static const unsigned cmpgtuBitOpcode[8] = {
+    J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t,  J4_cmpgtu_fp1_jump_nt,
+    J4_cmpgtu_fp1_jump_t,  J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t,
+    J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t};
+static const unsigned cmpeqiBitOpcode[8] = {
+    J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t,  J4_cmpeqi_fp1_jump_nt,
+    J4_cmpeqi_fp1_jump_t,  J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t,
+    J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t};
+static const unsigned cmpgtiBitOpcode[8] = {
+    J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t,  J4_cmpgti_fp1_jump_nt,
+    J4_cmpgti_fp1_jump_t,  J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t,
+    J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t};
+static const unsigned cmpgtuiBitOpcode[8] = {
+    J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t,  J4_cmpgtui_fp1_jump_nt,
+    J4_cmpgtui_fp1_jump_t,  J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t,
+    J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t};
+static const unsigned cmpeqn1BitOpcode[8] = {
+    J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t,  J4_cmpeqn1_fp1_jump_nt,
+    J4_cmpeqn1_fp1_jump_t,  J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t,
+    J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t};
+static const unsigned cmpgtn1BitOpcode[8] = {
+    J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t,  J4_cmpgtn1_fp1_jump_nt,
+    J4_cmpgtn1_fp1_jump_t,  J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t,
+    J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t,
+};
+
+// enum HexagonII::CompoundGroup
+namespace {
+unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
+  switch (MI.getOpcode()) {
+  default:
+    return HexagonII::HCG_None;
+  //
+  // Compound pairs.
+  // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+  // "Rd16=#U6 ; jump #r9:2"
+  // "Rd16=Rs16 ; jump #r9:2"
+  //
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtu:
+    if (IsExtended)
+      return false;
+    DstReg = MI.getOperand(0).getReg();
+    Src1Reg = MI.getOperand(1).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
+    if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgtui:
+    if (IsExtended)
+      return false;
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+    if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        (HexagonMCInstrInfo::inRange<5>(MI, 2) ||
+         HexagonMCInstrInfo::minConstant(MI, 2) == -1))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfr:
+    if (IsExtended)
+      return false;
+    // Rd = Rs
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfrsi:
+    if (IsExtended)
+      return false;
+    // Rd = #u6
+    DstReg = MI.getOperand(0).getReg();
+    if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
+        HexagonMCInstrInfo::minConstant(MI, 1) >= 0 &&
+        HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::S2_tstbit_i:
+    if (IsExtended)
+      return false;
+    DstReg = MI.getOperand(0).getReg();
+    Src1Reg = MI.getOperand(1).getReg();
+    if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        HexagonMCInstrInfo::minConstant(MI, 2) == 0)
+      return HexagonII::HCG_A;
+    break;
+  // The fact that .new form is used pretty much guarantees
+  // that predicate register will match. Nevertheless,
+  // there could be some false positives without additional
+  // checking.
+  case Hexagon::J2_jumptnew:
+  case Hexagon::J2_jumpfnew:
+  case Hexagon::J2_jumptnewpt:
+  case Hexagon::J2_jumpfnewpt:
+    Src1Reg = MI.getOperand(0).getReg();
+    if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)
+      return HexagonII::HCG_B;
+    break;
+  // Transfer and jump:
+  // Rd=#U6 ; jump #r9:2
+  // Rd=Rs ; jump #r9:2
+  // Do not test for jump range here.
+  case Hexagon::J2_jump:
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+    return HexagonII::HCG_C;
+    break;
+  }
+
+  return HexagonII::HCG_None;
+}
+}
+
+/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
+namespace {
+unsigned getCompoundOp(MCInst const &HMCI) {
+  const MCOperand &Predicate = HMCI.getOperand(0);
+  unsigned PredReg = Predicate.getReg();
+
+  assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) ||
+         (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3));
+
+  switch (HMCI.getOpcode()) {
+  default:
+    llvm_unreachable("Expected match not found.\n");
+    break;
+  case Hexagon::J2_jumpfnew:
+    return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt;
+  case Hexagon::J2_jumpfnewpt:
+    return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t;
+  case Hexagon::J2_jumptnew:
+    return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt;
+  case Hexagon::J2_jumptnewpt:
+    return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
+  }
+}
+}
+
+namespace {
+MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
+  MCInst *CompoundInsn = 0;
+  unsigned compoundOpcode;
+  MCOperand Rs, Rt;
+  int64_t Value;
+  bool Success;
+
+  switch (L.getOpcode()) {
+  default:
+    DEBUG(dbgs() << "Possible compound ignored\n");
+    return CompoundInsn;
+
+  case Hexagon::A2_tfrsi:
+    Rt = L.getOperand(0);
+    compoundOpcode = J4_jumpseti;
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+
+    CompoundInsn->addOperand(Rt);
+    CompoundInsn->addOperand(L.getOperand(1)); // Immediate
+    CompoundInsn->addOperand(R.getOperand(0)); // Jump target
+    break;
+
+  case Hexagon::A2_tfr:
+    Rt = L.getOperand(0);
+    Rs = L.getOperand(1);
+
+    compoundOpcode = J4_jumpsetr;
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rt);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(R.getOperand(0)); // Jump target.
+
+    break;
+
+  case Hexagon::C2_cmpeq:
+    DEBUG(dbgs() << "CX: C2_cmpeq\n");
+    Rs = L.getOperand(1);
+    Rt = L.getOperand(2);
+
+    compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)];
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(Rt);
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::C2_cmpgt:
+    DEBUG(dbgs() << "CX: C2_cmpgt\n");
+    Rs = L.getOperand(1);
+    Rt = L.getOperand(2);
+
+    compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)];
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(Rt);
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::C2_cmpgtu:
+    DEBUG(dbgs() << "CX: C2_cmpgtu\n");
+    Rs = L.getOperand(1);
+    Rt = L.getOperand(2);
+
+    compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)];
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(Rt);
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::C2_cmpeqi:
+    DEBUG(dbgs() << "CX: C2_cmpeqi\n");
+    Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success);
+    if (Value == -1)
+      compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
+    else
+      compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
+
+    Rs = L.getOperand(1);
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    if (Value != -1)
+      CompoundInsn->addOperand(L.getOperand(2));
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::C2_cmpgti:
+    DEBUG(dbgs() << "CX: C2_cmpgti\n");
+    Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success);
+    if (Value == -1)
+      compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
+    else
+      compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
+
+    Rs = L.getOperand(1);
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    if (Value != -1)
+      CompoundInsn->addOperand(L.getOperand(2));
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::C2_cmpgtui:
+    DEBUG(dbgs() << "CX: C2_cmpgtui\n");
+    Rs = L.getOperand(1);
+    compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)];
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(L.getOperand(2));
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+
+  case Hexagon::S2_tstbit_i:
+    DEBUG(dbgs() << "CX: S2_tstbit_i\n");
+    Rs = L.getOperand(1);
+    compoundOpcode = tstBitOpcode[getCompoundOp(R)];
+    CompoundInsn = new (Context) MCInst;
+    CompoundInsn->setOpcode(compoundOpcode);
+    CompoundInsn->addOperand(Rs);
+    CompoundInsn->addOperand(R.getOperand(1));
+    break;
+  }
+
+  return CompoundInsn;
+}
+}
+
+/// Non-Symmetrical. See if these two instructions are fit for compound pair.
+namespace {
+bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+                           MCInst const &MIb, bool IsExtendedB) {
+  unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
+  unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
+  // We have two candidates - check that this is the same register
+  // we are talking about.
+  unsigned Opca = MIa.getOpcode();
+  if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C &&
+      (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi))
+    return true;
+  return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
+          (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
+}
+}
+
+namespace {
+bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+  assert(HexagonMCInstrInfo::isBundle(MCI));
+  bool JExtended = false;
+  for (MCInst::iterator J =
+           MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+       J != MCI.end(); ++J) {
+    MCInst const *JumpInst = J->getInst();
+    if (HexagonMCInstrInfo::isImmext(*JumpInst)) {
+      JExtended = true;
+      continue;
+    }
+    if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
+        HexagonII::TypeJ) {
+      // Try to pair with another insn (B)undled with jump.
+      bool BExtended = false;
+      for (MCInst::iterator B =
+               MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
+           B != MCI.end(); ++B) {
+        MCInst const *Inst = B->getInst();
+        if (JumpInst == Inst)
+          continue;
+        if (HexagonMCInstrInfo::isImmext(*Inst)) {
+          BExtended = true;
+          continue;
+        }
+        DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << ","
+                     << Inst->getOpcode() << "\n");
+        if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) {
+          MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst);
+          if (CompoundInsn) {
+            DEBUG(dbgs() << "B: " << Inst->getOpcode() << ","
+                         << JumpInst->getOpcode() << " Compounds to "
+                         << CompoundInsn->getOpcode() << "\n");
+            J->setInst(CompoundInsn);
+            MCI.erase(B);
+            return true;
+          }
+        }
+        BExtended = false;
+      }
+    }
+    JExtended = false;
+  }
+  return false;
+}
+}
+
+/// tryCompound - Given a bundle check for compound insns when one
+/// is found update the contents fo the bundle with the compound insn.
+/// If a compound instruction is found then the bundle will have one
+/// additional slot.
+void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
+                                     MCContext &Context, MCInst &MCI) {
+  assert(HexagonMCInstrInfo::isBundle(MCI) &&
+         "Non-Bundle where Bundle expected");
+
+  // By definition a compound must have 2 insn.
+  if (MCI.size() < 2)
+    return;
+
+  // Look for compounds until none are found, only update the bundle when
+  // a compound is found.
+  while (lookForCompound(MCII, Context, MCI))
+    ;
+
+  return;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
new file mode 100644
index 0000000..e6194f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -0,0 +1,1087 @@
+//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements duplexing of instructions to reduce code size
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+
+using namespace llvm;
+using namespace Hexagon;
+
+#define DEBUG_TYPE "hexagon-mcduplex-info"
+
+// pair table of subInstructions with opcodes
+static const std::pair<unsigned, unsigned> opcodeData[] = {
+    std::make_pair((unsigned)V4_SA1_addi, 0),
+    std::make_pair((unsigned)V4_SA1_addrx, 6144),
+    std::make_pair((unsigned)V4_SA1_addsp, 3072),
+    std::make_pair((unsigned)V4_SA1_and1, 4608),
+    std::make_pair((unsigned)V4_SA1_clrf, 6768),
+    std::make_pair((unsigned)V4_SA1_clrfnew, 6736),
+    std::make_pair((unsigned)V4_SA1_clrt, 6752),
+    std::make_pair((unsigned)V4_SA1_clrtnew, 6720),
+    std::make_pair((unsigned)V4_SA1_cmpeqi, 6400),
+    std::make_pair((unsigned)V4_SA1_combine0i, 7168),
+    std::make_pair((unsigned)V4_SA1_combine1i, 7176),
+    std::make_pair((unsigned)V4_SA1_combine2i, 7184),
+    std::make_pair((unsigned)V4_SA1_combine3i, 7192),
+    std::make_pair((unsigned)V4_SA1_combinerz, 7432),
+    std::make_pair((unsigned)V4_SA1_combinezr, 7424),
+    std::make_pair((unsigned)V4_SA1_dec, 4864),
+    std::make_pair((unsigned)V4_SA1_inc, 4352),
+    std::make_pair((unsigned)V4_SA1_seti, 2048),
+    std::make_pair((unsigned)V4_SA1_setin1, 6656),
+    std::make_pair((unsigned)V4_SA1_sxtb, 5376),
+    std::make_pair((unsigned)V4_SA1_sxth, 5120),
+    std::make_pair((unsigned)V4_SA1_tfr, 4096),
+    std::make_pair((unsigned)V4_SA1_zxtb, 5888),
+    std::make_pair((unsigned)V4_SA1_zxth, 5632),
+    std::make_pair((unsigned)V4_SL1_loadri_io, 0),
+    std::make_pair((unsigned)V4_SL1_loadrub_io, 4096),
+    std::make_pair((unsigned)V4_SL2_deallocframe, 7936),
+    std::make_pair((unsigned)V4_SL2_jumpr31, 8128),
+    std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133),
+    std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135),
+    std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132),
+    std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134),
+    std::make_pair((unsigned)V4_SL2_loadrb_io, 4096),
+    std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680),
+    std::make_pair((unsigned)V4_SL2_loadrh_io, 0),
+    std::make_pair((unsigned)V4_SL2_loadri_sp, 7168),
+    std::make_pair((unsigned)V4_SL2_loadruh_io, 2048),
+    std::make_pair((unsigned)V4_SL2_return, 8000),
+    std::make_pair((unsigned)V4_SL2_return_f, 8005),
+    std::make_pair((unsigned)V4_SL2_return_fnew, 8007),
+    std::make_pair((unsigned)V4_SL2_return_t, 8004),
+    std::make_pair((unsigned)V4_SL2_return_tnew, 8006),
+    std::make_pair((unsigned)V4_SS1_storeb_io, 4096),
+    std::make_pair((unsigned)V4_SS1_storew_io, 0),
+    std::make_pair((unsigned)V4_SS2_allocframe, 7168),
+    std::make_pair((unsigned)V4_SS2_storebi0, 4608),
+    std::make_pair((unsigned)V4_SS2_storebi1, 4864),
+    std::make_pair((unsigned)V4_SS2_stored_sp, 2560),
+    std::make_pair((unsigned)V4_SS2_storeh_io, 0),
+    std::make_pair((unsigned)V4_SS2_storew_sp, 2048),
+    std::make_pair((unsigned)V4_SS2_storewi0, 4096),
+    std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
+
+static std::map<unsigned, unsigned>
+    subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData));
+
+bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+  switch (Ga) {
+  case HexagonII::HSIG_None:
+  default:
+    return false;
+  case HexagonII::HSIG_L1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_L2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_A:
+    return (Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_Compound:
+    return (Gb == HexagonII::HSIG_Compound);
+  }
+  return false;
+}
+
+unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
+  switch (Ga) {
+  case HexagonII::HSIG_None:
+  default:
+    break;
+  case HexagonII::HSIG_L1:
+    switch (Gb) {
+    default:
+      break;
+    case HexagonII::HSIG_L1:
+      return 0;
+    case HexagonII::HSIG_A:
+      return 0x4;
+    }
+  case HexagonII::HSIG_L2:
+    switch (Gb) {
+    default:
+      break;
+    case HexagonII::HSIG_L1:
+      return 0x1;
+    case HexagonII::HSIG_L2:
+      return 0x2;
+    case HexagonII::HSIG_A:
+      return 0x5;
+    }
+  case HexagonII::HSIG_S1:
+    switch (Gb) {
+    default:
+      break;
+    case HexagonII::HSIG_L1:
+      return 0x8;
+    case HexagonII::HSIG_L2:
+      return 0x9;
+    case HexagonII::HSIG_S1:
+      return 0xA;
+    case HexagonII::HSIG_A:
+      return 0x6;
+    }
+  case HexagonII::HSIG_S2:
+    switch (Gb) {
+    default:
+      break;
+    case HexagonII::HSIG_L1:
+      return 0xC;
+    case HexagonII::HSIG_L2:
+      return 0xD;
+    case HexagonII::HSIG_S1:
+      return 0xB;
+    case HexagonII::HSIG_S2:
+      return 0xE;
+    case HexagonII::HSIG_A:
+      return 0x7;
+    }
+  case HexagonII::HSIG_A:
+    switch (Gb) {
+    default:
+      break;
+    case HexagonII::HSIG_A:
+      return 0x3;
+    }
+  case HexagonII::HSIG_Compound:
+    switch (Gb) {
+    case HexagonII::HSIG_Compound:
+      return 0xFFFFFFFF;
+    }
+  }
+  return 0xFFFFFFFF;
+}
+
+unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
+  unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg;
+
+  switch (MCI.getOpcode()) {
+  default:
+    return HexagonII::HSIG_None;
+  //
+  // Group L1:
+  //
+  // Rd = memw(Rs+#u4:2)
+  // Rd = memub(Rs+#u4:0)
+  case Hexagon::L2_loadri_io:
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    // Special case this one from Group L2.
+    // Rd = memw(r29+#u5:2)
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+      if (HexagonMCInstrInfo::isIntReg(SrcReg) &&
+          Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) {
+        return HexagonII::HSIG_L2;
+      }
+      // Rd = memw(Rs+#u4:2)
+      if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+          inRange<4, 2>(MCI, 2)) {
+        return HexagonII::HSIG_L1;
+      }
+    }
+    break;
+  case Hexagon::L2_loadrub_io:
+    // Rd = memub(Rs+#u4:0)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        inRange<4>(MCI, 2)) {
+      return HexagonII::HSIG_L1;
+    }
+    break;
+  //
+  // Group L2:
+  //
+  // Rd = memh/memuh(Rs+#u3:1)
+  // Rd = memb(Rs+#u3:0)
+  // Rd = memw(r29+#u5:2) - Handled above.
+  // Rdd = memd(r29+#u5:3)
+  // deallocframe
+  // [if ([!]p0[.new])] dealloc_return
+  // [if ([!]p0[.new])] jumpr r31
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+    // Rd = memh/memuh(Rs+#u3:1)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        inRange<3, 1>(MCI, 2)) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+  case Hexagon::L2_loadrb_io:
+    // Rd = memb(Rs+#u3:0)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        inRange<3>(MCI, 2)) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+  case Hexagon::L2_loadrd_io:
+    // Rdd = memd(r29+#u5:3)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+        inRange<5, 3>(MCI, 2)) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+
+  case Hexagon::L4_return:
+
+  case Hexagon::L2_deallocframe:
+
+    return HexagonII::HSIG_L2;
+  case Hexagon::EH_RETURN_JMPR:
+
+  case Hexagon::J2_jumpr:
+  case Hexagon::JMPret:
+    // jumpr r31
+    // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+    DstReg = MCI.getOperand(0).getReg();
+    if (Hexagon::R31 == DstReg) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+
+  case Hexagon::J2_jumprt:
+  case Hexagon::J2_jumprf:
+  case Hexagon::J2_jumprtnew:
+  case Hexagon::J2_jumprfnew:
+  case Hexagon::JMPrett:
+  case Hexagon::JMPretf:
+  case Hexagon::JMPrettnew:
+  case Hexagon::JMPretfnew:
+  case Hexagon::JMPrettnewpt:
+  case Hexagon::JMPretfnewpt:
+    DstReg = MCI.getOperand(1).getReg();
+    SrcReg = MCI.getOperand(0).getReg();
+    // [if ([!]p0[.new])] jumpr r31
+    if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) &&
+        (Hexagon::R31 == DstReg)) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+  case Hexagon::L4_return_t:
+
+  case Hexagon::L4_return_f:
+
+  case Hexagon::L4_return_tnew_pnt:
+
+  case Hexagon::L4_return_fnew_pnt:
+
+  case Hexagon::L4_return_tnew_pt:
+
+  case Hexagon::L4_return_fnew_pt:
+    // [if ([!]p0[.new])] dealloc_return
+    SrcReg = MCI.getOperand(0).getReg();
+    if (Hexagon::P0 == SrcReg) {
+      return HexagonII::HSIG_L2;
+    }
+    break;
+  //
+  // Group S1:
+  //
+  // memw(Rs+#u4:2) = Rt
+  // memb(Rs+#u4:0) = Rt
+  case Hexagon::S2_storeri_io:
+    // Special case this one from Group S2.
+    // memw(r29+#u5:2) = Rt
+    Src1Reg = MCI.getOperand(0).getReg();
+    Src2Reg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+        Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) {
+      return HexagonII::HSIG_S2;
+    }
+    // memw(Rs+#u4:2) = Rt
+    if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+        inRange<4, 2>(MCI, 1)) {
+      return HexagonII::HSIG_S1;
+    }
+    break;
+  case Hexagon::S2_storerb_io:
+    // memb(Rs+#u4:0) = Rt
+    Src1Reg = MCI.getOperand(0).getReg();
+    Src2Reg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+        inRange<4>(MCI, 1)) {
+      return HexagonII::HSIG_S1;
+    }
+    break;
+  //
+  // Group S2:
+  //
+  // memh(Rs+#u3:1) = Rt
+  // memw(r29+#u5:2) = Rt
+  // memd(r29+#s6:3) = Rtt
+  // memw(Rs+#u4:2) = #U1
+  // memb(Rs+#u4) = #U1
+  // allocframe(#u5:3)
+  case Hexagon::S2_storerh_io:
+    // memh(Rs+#u3:1) = Rt
+    Src1Reg = MCI.getOperand(0).getReg();
+    Src2Reg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
+        inRange<3, 1>(MCI, 1)) {
+      return HexagonII::HSIG_S2;
+    }
+    break;
+  case Hexagon::S2_storerd_io:
+    // memd(r29+#s6:3) = Rtt
+    Src1Reg = MCI.getOperand(0).getReg();
+    Src2Reg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
+        HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
+        inSRange<6, 3>(MCI, 1)) {
+      return HexagonII::HSIG_S2;
+    }
+    break;
+  case Hexagon::S4_storeiri_io:
+    // memw(Rs+#u4:2) = #U1
+    Src1Reg = MCI.getOperand(0).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) {
+      return HexagonII::HSIG_S2;
+    }
+    break;
+  case Hexagon::S4_storeirb_io:
+    // memb(Rs+#u4) = #U1
+    Src1Reg = MCI.getOperand(0).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
+        inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) {
+      return HexagonII::HSIG_S2;
+    }
+    break;
+  case Hexagon::S2_allocframe:
+    if (inRange<5, 3>(MCI, 0))
+      return HexagonII::HSIG_S2;
+    break;
+  //
+  // Group A:
+  //
+  // Rx = add(Rx,#s7)
+  // Rd = Rs
+  // Rd = #u6
+  // Rd = #-1
+  // if ([!]P0[.new]) Rd = #0
+  // Rd = add(r29,#u6:2)
+  // Rx = add(Rx,Rs)
+  // P0 = cmp.eq(Rs,#u2)
+  // Rdd = combine(#0,Rs)
+  // Rdd = combine(Rs,#0)
+  // Rdd = combine(#u2,#U2)
+  // Rd = add(Rs,#1)
+  // Rd = add(Rs,#-1)
+  // Rd = sxth/sxtb/zxtb/zxth(Rs)
+  // Rd = and(Rs,#1)
+  case Hexagon::A2_addi:
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+      // Rd = add(r29,#u6:2)
+      if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
+          inRange<6, 2>(MCI, 2)) {
+        return HexagonII::HSIG_A;
+      }
+      // Rx = add(Rx,#s7)
+      if (DstReg == SrcReg) {
+        return HexagonII::HSIG_A;
+      }
+      // Rd = add(Rs,#1)
+      // Rd = add(Rs,#-1)
+      if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+          (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) {
+        return HexagonII::HSIG_A;
+      }
+    }
+    break;
+  case Hexagon::A2_add:
+    // Rx = add(Rx,Rs)
+    DstReg = MCI.getOperand(0).getReg();
+    Src1Reg = MCI.getOperand(1).getReg();
+    Src2Reg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_andir:
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_tfr:
+    // Rd = Rs
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_tfrsi:
+    DstReg = MCI.getOperand(0).getReg();
+
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::C2_cmoveit:
+  case Hexagon::C2_cmovenewit:
+  case Hexagon::C2_cmoveif:
+  case Hexagon::C2_cmovenewif:
+    // if ([!]P0[.new]) Rd = #0
+    // Actual form:
+    // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+    DstReg = MCI.getOperand(0).getReg();  // Rd
+    PredReg = MCI.getOperand(1).getReg(); // P0
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::C2_cmpeqi:
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (Hexagon::P0 == DstReg &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        inRange<2>(MCI, 2)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_combineii:
+  case Hexagon::A4_combineii:
+    // Rdd = combine(#u2,#U2)
+    DstReg = MCI.getOperand(0).getReg();
+    if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+        inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A4_combineri:
+    // Rdd = combine(Rs,#0)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        minConstant(MCI, 2) == 0) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A4_combineir:
+    // Rdd = combine(#0,Rs)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(2).getReg();
+    if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
+        minConstant(MCI, 1) == 0) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_sxtb:
+  case Hexagon::A2_sxth:
+  case Hexagon::A2_zxtb:
+  case Hexagon::A2_zxth:
+    // Rd = sxth/sxtb/zxtb/zxth(Rs)
+    DstReg = MCI.getOperand(0).getReg();
+    SrcReg = MCI.getOperand(1).getReg();
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
+        HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) {
+      return HexagonII::HSIG_A;
+    }
+    break;
+  }
+
+  return HexagonII::HSIG_None;
+}
+
+bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
+  unsigned DstReg, SrcReg;
+  switch (potentialDuplex.getOpcode()) {
+  case Hexagon::A2_addi:
+    // testing for case of: Rx = add(Rx,#s7)
+    DstReg = potentialDuplex.getOperand(0).getReg();
+    SrcReg = potentialDuplex.getOperand(1).getReg();
+    if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+      int64_t Value;
+      if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value))
+        return true;
+      if (!isShiftedInt<7, 0>(Value))
+        return true;
+    }
+    break;
+  case Hexagon::A2_tfrsi:
+    DstReg = potentialDuplex.getOperand(0).getReg();
+
+    if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
+      int64_t Value;
+      if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value))
+        return true;
+      // Check for case of Rd = #-1.
+      if (Value == -1)
+        return false;
+      // Check for case of Rd = #u6.
+      if (!isShiftedUInt<6, 0>(Value))
+        return true;
+    }
+    break;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// non-Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
+                                             MCInst const &MIa, bool ExtendedA,
+                                             MCInst const &MIb, bool ExtendedB,
+                                             bool bisReversable) {
+  // Slot 1 cannot be extended in duplexes PRM 10.5
+  if (ExtendedA)
+    return false;
+  // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5
+  if (ExtendedB) {
+    unsigned Opcode = MIb.getOpcode();
+    if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi))
+      return false;
+  }
+  unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa),
+           MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb);
+
+  // If a duplex contains 2 insns in the same group, the insns must be
+  // ordered such that the numerically smaller opcode is in slot 1.
+  if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) {
+    MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa);
+    MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb);
+
+    unsigned zeroedSubInstS0 =
+        subinstOpcodeMap.find(SubInst0.getOpcode())->second;
+    unsigned zeroedSubInstS1 =
+        subinstOpcodeMap.find(SubInst1.getOpcode())->second;
+
+    if (zeroedSubInstS0 < zeroedSubInstS1)
+      // subinstS0 (maps to slot 0) must be greater than
+      // subinstS1 (maps to slot 1)
+      return false;
+  }
+
+  // allocframe must always be in slot 0
+  if (MIb.getOpcode() == Hexagon::S2_allocframe)
+    return false;
+
+  if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) {
+    // Prevent 2 instructions with extenders from duplexing
+    // Note that MIb (slot1) can be extended and MIa (slot0)
+    //   can never be extended
+    if (subInstWouldBeExtended(MIa))
+      return false;
+
+    // If duplexing produces an extender, but the original did not
+    //   have an extender, do not duplex.
+    if (subInstWouldBeExtended(MIb) && !ExtendedB)
+      return false;
+  }
+
+  // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb).
+  if (MIbG == HexagonII::HSIG_L2) {
+    if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() &&
+        (MIb.getOperand(1).getReg() == Hexagon::R31))
+      return false;
+    if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() &&
+        (MIb.getOperand(0).getReg() == Hexagon::R31))
+      return false;
+  }
+
+  // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
+  //   therefore, not duplexable if slot 1 is a store, and slot 0 is not.
+  if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
+    if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
+      return false;
+  }
+
+  return (isDuplexPairMatch(MIaG, MIbG));
+}
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) {
+  unsigned MIaG = getDuplexCandidateGroup(MIa),
+           MIbG = getDuplexCandidateGroup(MIb);
+  return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
+                          unsigned opNum) {
+  if (Inst.getOperand(opNum).isReg()) {
+    switch (Inst.getOperand(opNum).getReg()) {
+    default:
+      llvm_unreachable("Not Duplexable Register");
+      break;
+    case Hexagon::R0:
+    case Hexagon::R1:
+    case Hexagon::R2:
+    case Hexagon::R3:
+    case Hexagon::R4:
+    case Hexagon::R5:
+    case Hexagon::R6:
+    case Hexagon::R7:
+    case Hexagon::D0:
+    case Hexagon::D1:
+    case Hexagon::D2:
+    case Hexagon::D3:
+    case Hexagon::R16:
+    case Hexagon::R17:
+    case Hexagon::R18:
+    case Hexagon::R19:
+    case Hexagon::R20:
+    case Hexagon::R21:
+    case Hexagon::R22:
+    case Hexagon::R23:
+    case Hexagon::D8:
+    case Hexagon::D9:
+    case Hexagon::D10:
+    case Hexagon::D11:
+      subInstPtr.addOperand(Inst.getOperand(opNum));
+      break;
+    }
+  } else
+    subInstPtr.addOperand(Inst.getOperand(opNum));
+}
+
+MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
+  MCInst Result;
+  bool Absolute;
+  int64_t Value;
+  switch (Inst.getOpcode()) {
+  default:
+    // dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
+    llvm_unreachable("Unimplemented subinstruction \n");
+    break;
+  case Hexagon::A2_addi:
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 1) {
+      Result.setOpcode(Hexagon::V4_SA1_inc);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break;
+    } //  1,2 SUBInst $Rd = add($Rs, #1)
+    else if (Value == -1) {
+      Result.setOpcode(Hexagon::V4_SA1_dec);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break;
+    } //  1,2 SUBInst $Rd = add($Rs,#-1)
+    else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+      Result.setOpcode(Hexagon::V4_SA1_addsp);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break;
+    } //  1,3 SUBInst $Rd = add(r29, #$u6_2)
+    else {
+      Result.setOpcode(Hexagon::V4_SA1_addi);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      addOps(Result, Inst, 2);
+      break;
+    } //    1,2,3 SUBInst $Rx = add($Rx, #$s7)
+  case Hexagon::A2_add:
+    Result.setOpcode(Hexagon::V4_SA1_addrx);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst $Rx = add($_src_, $Rs)
+  case Hexagon::S2_allocframe:
+    Result.setOpcode(Hexagon::V4_SS2_allocframe);
+    addOps(Result, Inst, 0);
+    break; //    1 SUBInst allocframe(#$u5_3)
+  case Hexagon::A2_andir:
+    if (minConstant(Inst, 2) == 255) {
+      Result.setOpcode(Hexagon::V4_SA1_zxtb);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //    1,2    $Rd = and($Rs, #255)
+    } else {
+      Result.setOpcode(Hexagon::V4_SA1_and1);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //    1,2 SUBInst $Rd = and($Rs, #1)
+    }
+  case Hexagon::C2_cmpeqi:
+    Result.setOpcode(Hexagon::V4_SA1_cmpeqi);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
+  case Hexagon::A4_combineii:
+  case Hexagon::A2_combineii:
+    Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 1) {
+      Result.setOpcode(Hexagon::V4_SA1_combine1i);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break; //  1,3 SUBInst $Rdd = combine(#1, #$u2)
+    }
+    if (Value == 3) {
+      Result.setOpcode(Hexagon::V4_SA1_combine3i);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break; //  1,3 SUBInst $Rdd = combine(#3, #$u2)
+    }
+    if (Value == 0) {
+      Result.setOpcode(Hexagon::V4_SA1_combine0i);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break; //  1,3 SUBInst $Rdd = combine(#0, #$u2)
+    }
+    if (Value == 2) {
+      Result.setOpcode(Hexagon::V4_SA1_combine2i);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break; //  1,3 SUBInst $Rdd = combine(#2, #$u2)
+    }
+  case Hexagon::A4_combineir:
+    Result.setOpcode(Hexagon::V4_SA1_combinezr);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 2);
+    break; //    1,3 SUBInst $Rdd = combine(#0, $Rs)
+
+  case Hexagon::A4_combineri:
+    Result.setOpcode(Hexagon::V4_SA1_combinerz);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //    1,2 SUBInst $Rdd = combine($Rs, #0)
+  case Hexagon::L4_return_tnew_pnt:
+  case Hexagon::L4_return_tnew_pt:
+    Result.setOpcode(Hexagon::V4_SL2_return_tnew);
+    break; //    none  SUBInst if (p0.new) dealloc_return:nt
+  case Hexagon::L4_return_fnew_pnt:
+  case Hexagon::L4_return_fnew_pt:
+    Result.setOpcode(Hexagon::V4_SL2_return_fnew);
+    break; //    none  SUBInst if (!p0.new) dealloc_return:nt
+  case Hexagon::L4_return_f:
+    Result.setOpcode(Hexagon::V4_SL2_return_f);
+    break; //    none  SUBInst if (!p0) dealloc_return
+  case Hexagon::L4_return_t:
+    Result.setOpcode(Hexagon::V4_SL2_return_t);
+    break; //    none  SUBInst if (p0) dealloc_return
+  case Hexagon::L4_return:
+    Result.setOpcode(Hexagon::V4_SL2_return);
+    break; //    none  SUBInst dealloc_return
+  case Hexagon::L2_deallocframe:
+    Result.setOpcode(Hexagon::V4_SL2_deallocframe);
+    break; //    none  SUBInst deallocframe
+  case Hexagon::EH_RETURN_JMPR:
+  case Hexagon::J2_jumpr:
+  case Hexagon::JMPret:
+    Result.setOpcode(Hexagon::V4_SL2_jumpr31);
+    break; //    none  SUBInst jumpr r31
+  case Hexagon::J2_jumprf:
+  case Hexagon::JMPretf:
+    Result.setOpcode(Hexagon::V4_SL2_jumpr31_f);
+    break; //    none  SUBInst if (!p0) jumpr r31
+  case Hexagon::J2_jumprfnew:
+  case Hexagon::JMPretfnewpt:
+  case Hexagon::JMPretfnew:
+    Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew);
+    break; //    none  SUBInst if (!p0.new) jumpr:nt r31
+  case Hexagon::J2_jumprt:
+  case Hexagon::JMPrett:
+    Result.setOpcode(Hexagon::V4_SL2_jumpr31_t);
+    break; //    none  SUBInst if (p0) jumpr r31
+  case Hexagon::J2_jumprtnew:
+  case Hexagon::JMPrettnewpt:
+  case Hexagon::JMPrettnew:
+    Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew);
+    break; //    none  SUBInst if (p0.new) jumpr:nt r31
+  case Hexagon::L2_loadrb_io:
+    Result.setOpcode(Hexagon::V4_SL2_loadrb_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst $Rd = memb($Rs + #$u3_0)
+  case Hexagon::L2_loadrd_io:
+    Result.setOpcode(Hexagon::V4_SL2_loadrd_sp);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 2);
+    break; //    1,3 SUBInst $Rdd = memd(r29 + #$u5_3)
+  case Hexagon::L2_loadrh_io:
+    Result.setOpcode(Hexagon::V4_SL2_loadrh_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst $Rd = memh($Rs + #$u3_1)
+  case Hexagon::L2_loadrub_io:
+    Result.setOpcode(Hexagon::V4_SL1_loadrub_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst $Rd = memub($Rs + #$u4_0)
+  case Hexagon::L2_loadruh_io:
+    Result.setOpcode(Hexagon::V4_SL2_loadruh_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1)
+  case Hexagon::L2_loadri_io:
+    if (Inst.getOperand(1).getReg() == Hexagon::R29) {
+      Result.setOpcode(Hexagon::V4_SL2_loadri_sp);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 2);
+      break; //  2 1,3 SUBInst $Rd = memw(r29 + #$u5_2)
+    } else {
+      Result.setOpcode(Hexagon::V4_SL1_loadri_io);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      addOps(Result, Inst, 2);
+      break; //    1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
+    }
+  case Hexagon::S4_storeirb_io:
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 0) {
+      Result.setOpcode(Hexagon::V4_SS2_storebi0);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //    1,2 SUBInst memb($Rs + #$u4_0)=#0
+    } else if (Value == 1) {
+      Result.setOpcode(Hexagon::V4_SS2_storebi1);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //  2 1,2 SUBInst memb($Rs + #$u4_0)=#1
+    }
+  case Hexagon::S2_storerb_io:
+    Result.setOpcode(Hexagon::V4_SS1_storeb_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+  case Hexagon::S2_storerd_io:
+    Result.setOpcode(Hexagon::V4_SS2_stored_sp);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    2,3 SUBInst memd(r29 + #$s6_3) = $Rtt
+  case Hexagon::S2_storerh_io:
+    Result.setOpcode(Hexagon::V4_SS2_storeh_io);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    addOps(Result, Inst, 2);
+    break; //    1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
+  case Hexagon::S4_storeiri_io:
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 0) {
+      Result.setOpcode(Hexagon::V4_SS2_storewi0);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //  3 1,2 SUBInst memw($Rs + #$u4_2)=#0
+    } else if (Value == 1) {
+      Result.setOpcode(Hexagon::V4_SS2_storewi1);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //  3 1,2 SUBInst memw($Rs + #$u4_2)=#1
+    } else if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+      Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+      addOps(Result, Inst, 1);
+      addOps(Result, Inst, 2);
+      break; //  1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt
+    }
+  case Hexagon::S2_storeri_io:
+    if (Inst.getOperand(0).getReg() == Hexagon::R29) {
+      Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+      addOps(Result, Inst, 1);
+      addOps(Result, Inst, 2); //  1,2,3 SUBInst memw(sp + #$u5_2) = $Rt
+    } else {
+      Result.setOpcode(Hexagon::V4_SS1_storew_io);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      addOps(Result, Inst, 2); //  1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt
+    }
+    break;
+  case Hexagon::A2_sxtb:
+    Result.setOpcode(Hexagon::V4_SA1_sxtb);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //  1,2 SUBInst $Rd = sxtb($Rs)
+  case Hexagon::A2_sxth:
+    Result.setOpcode(Hexagon::V4_SA1_sxth);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //  1,2 SUBInst $Rd = sxth($Rs)
+  case Hexagon::A2_tfr:
+    Result.setOpcode(Hexagon::V4_SA1_tfr);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //  1,2 SUBInst $Rd = $Rs
+  case Hexagon::C2_cmovenewif:
+    Result.setOpcode(Hexagon::V4_SA1_clrfnew);
+    addOps(Result, Inst, 0);
+    break; //  2 SUBInst if (!p0.new) $Rd = #0
+  case Hexagon::C2_cmovenewit:
+    Result.setOpcode(Hexagon::V4_SA1_clrtnew);
+    addOps(Result, Inst, 0);
+    break; //  2 SUBInst if (p0.new) $Rd = #0
+  case Hexagon::C2_cmoveif:
+    Result.setOpcode(Hexagon::V4_SA1_clrf);
+    addOps(Result, Inst, 0);
+    break; //  2 SUBInst if (!p0) $Rd = #0
+  case Hexagon::C2_cmoveit:
+    Result.setOpcode(Hexagon::V4_SA1_clrt);
+    addOps(Result, Inst, 0);
+    break; //  2 SUBInst if (p0) $Rd = #0
+  case Hexagon::A2_tfrsi:
+    Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+    if (Absolute && Value == -1) {
+      Result.setOpcode(Hexagon::V4_SA1_setin1);
+      addOps(Result, Inst, 0);
+      break; //  2 1 SUBInst $Rd = #-1
+    } else {
+      Result.setOpcode(Hexagon::V4_SA1_seti);
+      addOps(Result, Inst, 0);
+      addOps(Result, Inst, 1);
+      break; //    1,2 SUBInst $Rd = #$u6
+    }
+  case Hexagon::A2_zxtb:
+    Result.setOpcode(Hexagon::V4_SA1_zxtb);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //    1,2    $Rd = and($Rs, #255)
+
+  case Hexagon::A2_zxth:
+    Result.setOpcode(Hexagon::V4_SA1_zxth);
+    addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
+    break; //    1,2 SUBInst $Rd = zxth($Rs)
+  }
+  return Result;
+}
+
+static bool isStoreInst(unsigned opCode) {
+  switch (opCode) {
+  case Hexagon::S2_storeri_io:
+  case Hexagon::S2_storerb_io:
+  case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerd_io:
+  case Hexagon::S4_storeiri_io:
+  case Hexagon::S4_storeirb_io:
+  case Hexagon::S2_allocframe:
+    return true;
+  default:
+    return false;
+  }
+}
+
+SmallVector<DuplexCandidate, 8>
+HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
+                                          MCInst const &MCB) {
+  assert(isBundle(MCB));
+  SmallVector<DuplexCandidate, 8> duplexToTry;
+  // Use an "order matters" version of isDuplexPair.
+  unsigned numInstrInPacket = MCB.getNumOperands();
+
+  for (unsigned distance = 1; distance < numInstrInPacket; ++distance) {
+    for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset,
+                  k = j + distance;
+         (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) {
+
+      // Check if reversable.
+      bool bisReversable = true;
+      if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) &&
+          isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) {
+        DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j
+                     << "\n");
+        bisReversable = false;
+      }
+      if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
+        bisReversable = false;
+
+      // Try in order.
+      if (isOrderedDuplexPair(
+              MCII, *MCB.getOperand(k).getInst(),
+              HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+              *MCB.getOperand(j).getInst(),
+              HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+              bisReversable)) {
+        // Get iClass.
+        unsigned iClass = iClassOfDuplexPair(
+            getDuplexCandidateGroup(*MCB.getOperand(k).getInst()),
+            getDuplexCandidateGroup(*MCB.getOperand(j).getInst()));
+
+        // Save off pairs for duplex checking.
+        duplexToTry.push_back(DuplexCandidate(j, k, iClass));
+        DEBUG(dbgs() << "adding pair: " << j << "," << k << ":"
+                     << MCB.getOperand(j).getInst()->getOpcode() << ","
+                     << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+        continue;
+      } else {
+        DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":"
+                     << MCB.getOperand(j).getInst()->getOpcode() << ","
+                     << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+      }
+
+      // Try reverse.
+      if (bisReversable) {
+        if (isOrderedDuplexPair(
+                MCII, *MCB.getOperand(j).getInst(),
+                HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
+                *MCB.getOperand(k).getInst(),
+                HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
+                bisReversable)) {
+          // Get iClass.
+          unsigned iClass = iClassOfDuplexPair(
+              getDuplexCandidateGroup(*MCB.getOperand(j).getInst()),
+              getDuplexCandidateGroup(*MCB.getOperand(k).getInst()));
+
+          // Save off pairs for duplex checking.
+          duplexToTry.push_back(DuplexCandidate(k, j, iClass));
+          DEBUG(dbgs() << "adding pair:" << k << "," << j << ":"
+                       << MCB.getOperand(j).getInst()->getOpcode() << ","
+                       << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+        } else {
+          DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":"
+                       << MCB.getOperand(j).getInst()->getOpcode() << ","
+                       << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+        }
+      }
+    }
+  }
+  return duplexToTry;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
new file mode 100644
index 0000000..eaa3550
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -0,0 +1,150 @@
+//=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a stub that parses a MCInst bundle and passes the
+// instructions on to the real streamer.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagonmcelfstreamer"
+
+#include "Hexagon.h"
+#include "HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+    GPSize("gpsize", cl::NotHidden,
+           cl::desc("Global Pointer Addressing Size.  The default size is 8."),
+           cl::Prefix, cl::init(8));
+
+void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
+                                           const MCSubtargetInfo &STI) {
+  MCInst HMI = HexagonMCInstrInfo::createBundle();
+  MCInst *MCB;
+
+  if (MCK.getOpcode() != Hexagon::BUNDLE) {
+    HMI.addOperand(MCOperand::createInst(&MCK));
+    MCB = &HMI;
+  } else
+    MCB = const_cast<MCInst *>(&MCK);
+
+  // Examines packet and pad the packet, if needed, when an
+  // end-loop is in the bundle.
+  HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
+  HexagonMCShuffle(*MCII, STI, *MCB);
+
+  assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
+  bool Extended = false;
+  for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+    MCInst *MCI = const_cast<MCInst *>(I.getInst());
+    if (Extended) {
+      if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
+        MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
+        HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
+      } else {
+        HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
+      }
+      Extended = false;
+    } else {
+      Extended = HexagonMCInstrInfo::isImmext(*MCI);
+    }
+  }
+
+  // At this point, MCB is a bundle
+  // Iterate through the bundle and assign addends for the instructions
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+    MCInst *MCI = const_cast<MCInst *>(I.getInst());
+    EmitSymbol(*MCI);
+  }
+  MCObjectStreamer::EmitInstruction(*MCB, STI);
+}
+
+void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--;)
+    if (Inst.getOperand(i).isExpr())
+      visitUsedExpr(*Inst.getOperand(i).getExpr());
+}
+
+// EmitCommonSymbol and EmitLocalCommonSymbol are extended versions of the
+// functions found in MCELFStreamer.cpp taking AccessSize as an additional
+// parameter.
+void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
+                                                     uint64_t Size,
+                                                     unsigned ByteAlignment,
+                                                     unsigned AccessSize) {
+  getAssembler().registerSymbol(*Symbol);
+  StringRef sbss[4] = {".sbss.1", ".sbss.2", ".sbss.4", ".sbss.8"};
+
+  auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+  if (!ELFSymbol->isBindingSet()) {
+    ELFSymbol->setBinding(ELF::STB_GLOBAL);
+    ELFSymbol->setExternal(true);
+  }
+
+  ELFSymbol->setType(ELF::STT_OBJECT);
+
+  if (ELFSymbol->getBinding() == ELF::STB_LOCAL) {
+    StringRef SectionName =
+        ((AccessSize == 0) || (Size == 0) || (Size > GPSize))
+            ? ".bss"
+            : sbss[(Log2_64(AccessSize))];
+
+    MCSection *CrntSection = getCurrentSection().first;
+    MCSection *Section = getAssembler().getContext().getELFSection(
+        SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    SwitchSection(Section);
+    AssignFragment(Symbol, getCurrentFragment());
+
+    MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
+    SwitchSection(CrntSection);
+  } else {
+    if (ELFSymbol->declareCommon(Size, ByteAlignment))
+      report_fatal_error("Symbol: " + Symbol->getName() +
+                         " redeclared as different type");
+    if ((AccessSize) && (Size <= GPSize)) {
+      uint64_t SectionIndex =
+          (AccessSize <= GPSize)
+              ? ELF::SHN_HEXAGON_SCOMMON + (Log2_64(AccessSize) + 1)
+              : (unsigned)ELF::SHN_HEXAGON_SCOMMON;
+      ELFSymbol->setIndex(SectionIndex);
+    }
+  }
+
+  ELFSymbol->setSize(MCConstantExpr::create(Size, getContext()));
+}
+
+void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
+    MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment,
+    unsigned AccessSize) {
+  getAssembler().registerSymbol(*Symbol);
+  auto ELFSymbol = cast<MCSymbolELF>(Symbol);
+  ELFSymbol->setBinding(ELF::STB_LOCAL);
+  ELFSymbol->setExternal(false);
+  HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
+}
+
+namespace llvm {
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE) {
+  return new HexagonMCELFStreamer(Context, MAB, OS, CE);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
new file mode 100644
index 0000000..d77c0cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -0,0 +1,45 @@
+//===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCELFSTREAMER_H
+#define HEXAGONMCELFSTREAMER_H
+
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "HexagonTargetStreamer.h"
+
+namespace llvm {
+
+class HexagonMCELFStreamer : public MCELFStreamer {
+  std::unique_ptr<MCInstrInfo> MCII;
+
+public:
+  HexagonMCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                       raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
+      : MCELFStreamer(Context, TAB, OS, Emitter),
+        MCII(createHexagonMCInstrInfo()) {}
+
+  virtual void EmitInstruction(const MCInst &Inst,
+                               const MCSubtargetInfo &STI) override;
+  void EmitSymbol(const MCInst &Inst);
+  void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment,
+                                      unsigned AccessSize);
+  void HexagonMCEmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                 unsigned ByteAlignment, unsigned AccessSize);
+};
+
+MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE);
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
new file mode 100644
index 0000000..fc62626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
+//----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-mcexpr"
+
+HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr,
+                                                       MCContext &Ctx) {
+  return new (Ctx) HexagonNoExtendOperand(Expr);
+}
+
+bool HexagonNoExtendOperand::evaluateAsRelocatableImpl(
+    MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const {
+  return Expr->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {}
+
+MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const {
+  return Expr->findAssociatedFragment();
+}
+
+void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; }
+
+bool HexagonNoExtendOperand::classof(MCExpr const *E) {
+  return E->getKind() == MCExpr::Target;
+}
+
+HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr)
+    : Expr(Expr) {}
+
+void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  Expr->print(OS, MAI);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
new file mode 100644
index 0000000..60f180f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -0,0 +1,35 @@
+//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCInst;
+class HexagonNoExtendOperand : public MCTargetExpr {
+public:
+  static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx);
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+  MCFragment *findAssociatedFragment() const override;
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+  static bool classof(MCExpr const *E);
+  MCExpr const *getExpr() const;
+
+private:
+  HexagonNoExtendOperand(MCExpr const *Expr);
+  MCExpr const *Expr;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
new file mode 100644
index 0000000..e684207
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -0,0 +1,648 @@
+//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCInstrInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonBaseInfo.h"
+#include "HexagonMCChecker.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
+                                     MCContext &Context) {
+  MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
+}
+
+void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
+                                          MCInstrInfo const &MCII, MCInst &MCB,
+                                          MCInst const &MCI) {
+  assert(HexagonMCInstrInfo::isBundle(MCB));
+  MCOperand const &exOp =
+      MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+
+  // Create the extender.
+  MCInst *XMCI =
+      new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp));
+
+  MCB.addOperand(MCOperand::createInst(XMCI));
+}
+
+iterator_range<MCInst::const_iterator>
+HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
+}
+
+size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
+  if (HexagonMCInstrInfo::isBundle(MCI))
+    return (MCI.size() - bundleInstructionsOffset);
+  else
+    return (1);
+}
+
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+                                            MCSubtargetInfo const &STI,
+                                            MCContext &Context, MCInst &MCB,
+                                            HexagonMCChecker *Check) {
+  // Examine the packet and convert pairs of instructions to compound
+  // instructions when possible.
+  if (!HexagonDisableCompound)
+    HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
+  // Check the bundle for errors.
+  bool CheckOk = Check ? Check->check() : true;
+  if (!CheckOk)
+    return false;
+  HexagonMCShuffle(MCII, STI, MCB);
+  // Examine the packet and convert pairs of instructions to duplex
+  // instructions when possible.
+  MCInst InstBundlePreDuplex = MCInst(MCB);
+  if (!HexagonDisableDuplex) {
+    SmallVector<DuplexCandidate, 8> possibleDuplexes;
+    possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+    HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+  }
+  // Examines packet and pad the packet, if needed, when an
+  // end-loop is in the bundle.
+  HexagonMCInstrInfo::padEndloop(Context, MCB);
+  // If compounding and duplexing didn't reduce the size below
+  // 4 or less we have a packet that is too big.
+  if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+    return false;
+  HexagonMCShuffle(MCII, STI, MCB);
+  return true;
+}
+
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
+                                       MCContext &Context, MCInst &MCI) {
+  assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+         HexagonMCInstrInfo::isExtended(MCII, MCI));
+  MCOperand &exOp =
+      MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+  // If the extended value is a constant, then use it for the extended and
+  // for the extender instructions, masking off the lower 6 bits and
+  // including the assumed bits.
+  int64_t Value;
+  if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
+    unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
+    exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context));
+  }
+}
+
+MCInst HexagonMCInstrInfo::createBundle() {
+  MCInst Result;
+  Result.setOpcode(Hexagon::BUNDLE);
+  Result.addOperand(MCOperand::createImm(0));
+  return Result;
+}
+
+MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
+                                         MCInst const &inst0,
+                                         MCInst const &inst1) {
+  assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
+  MCInst *duplexInst = new (Context) MCInst;
+  duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
+
+  MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
+  MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
+  duplexInst->addOperand(MCOperand::createInst(SubInst0));
+  duplexInst->addOperand(MCOperand::createInst(SubInst1));
+  return duplexInst;
+}
+
+MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
+                                          MCInst const &Inst,
+                                          MCOperand const &MO) {
+  assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
+         HexagonMCInstrInfo::isExtended(MCII, Inst));
+
+  MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+  MCInst XMI;
+  XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
+                 HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
+                    ? Hexagon::A4_ext_b
+                    : Hexagon::A4_ext);
+  if (MO.isImm())
+    XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
+  else if (MO.isExpr())
+    XMI.addOperand(MCOperand::createExpr(MO.getExpr()));
+  else
+    llvm_unreachable("invalid extendable operand");
+  return XMI;
+}
+
+MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
+                                                   size_t Index) {
+  assert(Index <= bundleSize(MCB));
+  if (Index == 0)
+    return nullptr;
+  MCInst const *Inst =
+      MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst();
+  if (isImmext(*Inst))
+    return Inst;
+  return nullptr;
+}
+
+void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context,
+                                        MCInstrInfo const &MCII, MCInst &MCB,
+                                        MCInst const &MCI, bool MustExtend) {
+  if (isConstExtended(MCII, MCI) || MustExtend)
+    addConstExtender(Context, MCII, MCB, MCI);
+}
+
+HexagonII::MemAccessSize
+HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+  return (HexagonII::MemAccessSize((F >> HexagonII::MemAccessSizePos) &
+                                   HexagonII::MemAccesSizeMask));
+}
+
+unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
+                                                MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
+                                               MCInst const &MCI) {
+  return (MCII.get(MCI.getOpcode()));
+}
+
+unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII,
+                                                   MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI);
+  MCOperand const &MO = MCI.getOperand(O);
+
+  assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+          HexagonMCInstrInfo::isExtended(MCII, MCI)) &&
+         (MO.isImm() || MO.isExpr()));
+  return (MO);
+}
+
+unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII,
+                                                MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentAlignPos) & HexagonII::ExtentAlignMask);
+}
+
+unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
+                                           MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned isSigned =
+      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return ~(-1U << (bits - 1));
+  else
+    return ~(-1U << bits);
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned isSigned =
+      (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+  unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1U << (bits - 1);
+  else
+    return 0;
+}
+
+char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
+                                        MCInst const &MCI) {
+  return MCII.getName(MCI.getOpcode());
+}
+
+unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII,
+                                                 MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask);
+}
+
+MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
+                                                        MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  unsigned const O =
+      (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+  MCOperand const &MCO = MCI.getOperand(O);
+
+  assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+          HexagonMCInstrInfo::hasNewValue(MCII, MCI)) &&
+         MCO.isReg());
+  return (MCO);
+}
+
+/// Return the new value or the newly produced value.
+unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII,
+                                                  MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
+                                        MCInst const &MCI) {
+  unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI);
+  MCOperand const &MCO = MCI.getOperand(O);
+
+  assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+          HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) &&
+         MCO.isReg());
+  return (MCO);
+}
+
+int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
+                                     MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+  HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>(
+      (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask);
+
+  switch (Target) {
+  default:
+    return Hexagon::ArchV4;
+  case HexagonII::HasV5SubT:
+    return Hexagon::ArchV5;
+  }
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+                                     MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+
+  return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
+                                      MCSubtargetInfo const &STI,
+                                      MCInst const &MCI) {
+
+  const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+  int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+  return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
+}
+
+bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+  if (!HexagonMCInstrInfo::isBundle(MCI))
+    return false;
+
+  for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
+    auto MI = I.getInst();
+    if (isImmext(*MI))
+      return true;
+  }
+
+  return false;
+}
+
+bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
+  return extenderForIndex(MCB, Index) != nullptr;
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
+                                     MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+/// Return whether the insn produces a second value.
+bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2);
+}
+
+MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
+  assert(isBundle(MCB));
+  assert(Index < HEXAGON_PACKET_SIZE);
+  return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
+}
+
+bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) {
+  auto Result = Hexagon::BUNDLE == MCI.getOpcode();
+  assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm()));
+  return Result;
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+  return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+          !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
+          HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
+}
+
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
+}
+
+bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
+  return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
+          (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
+}
+
+bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
+  return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+//    out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  if (HexagonMCInstrInfo::isExtended(MCII, MCI))
+    return true;
+  // Branch insns are handled as necessary by relaxation.
+  if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
+      (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+       HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
+      (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+       HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
+    return false;
+  // Otherwise loop instructions and other CR insts are handled by relaxation
+  else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) &&
+           (MCI.getOpcode() != Hexagon::C4_addipc))
+    return false;
+  else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+    return false;
+
+  MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI);
+
+  // We could be using an instruction with an extendable immediate and shoehorn
+  // a global address into it. If it is a global address it will be constant
+  // extended. We do this for COMBINE.
+  // We currently only handle isGlobal() because it is the only kind of
+  // object we are going to end up with here for now.
+  // In the future we probably should add isSymbol(), etc.
+  assert(!MO.isImm());
+  int64_t Value;
+  if (!MO.getExpr()->evaluateAsAbsolute(Value))
+    return true;
+  int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+  int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+  return (MinValue > Value || Value > MaxValue);
+}
+
+bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::FPPos) & HexagonII::FPMask);
+}
+
+bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) {
+  auto Op = MCI.getOpcode();
+  return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c ||
+          Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext);
+}
+
+bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  int64_t Flags = MCI.getOperand(0).getImm();
+  return (Flags & innerLoopMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isIntReg(unsigned Reg) {
+  return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31);
+}
+
+bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) {
+  return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+          (Reg >= Hexagon::R16 && Reg <= Hexagon::R23));
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
+                                           MCInst const &MCI,
+                                           unsigned short OperandNum) {
+  uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
+         OperandNum;
+}
+
+bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  int64_t Flags = MCI.getOperand(0).getImm();
+  return (Flags & outerLoopMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask);
+}
+
+/// Return whether the insn is newly predicated.
+bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
+bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
+                                          MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (
+      !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask));
+}
+
+bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
+  return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
+}
+
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+  return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+}
+
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  auto Flags = MCI.getOperand(0).getImm();
+  return (Flags & memReorderDisabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  auto Flags = MCI.getOperand(0).getImm();
+  return (Flags & memStoreReorderEnabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
+}
+
+bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
+}
+
+bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
+  if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
+      (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
+    return true;
+  return false;
+}
+
+int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
+  auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+                  << 8;
+  if (MCI.size() <= Index)
+    return Sentinal;
+  MCOperand const &MCO = MCI.getOperand(Index);
+  if (!MCO.isExpr())
+    return Sentinal;
+  int64_t Value;
+  if (!MCO.getExpr()->evaluateAsAbsolute(Value))
+    return Sentinal;
+  return Value;
+}
+
+void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
+  MCInst Nop;
+  Nop.setOpcode(Hexagon::A2_nop);
+  assert(isBundle(MCB));
+  while ((HexagonMCInstrInfo::isInnerLoop(MCB) &&
+          (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
+         ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
+           (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
+    MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
+}
+
+bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR)
+    return false;
+
+  unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_2_SLOT23:
+  case Hexagon::Sched::ALU64_tc_3x_SLOT23:
+  case Hexagon::Sched::M_tc_2_SLOT23:
+  case Hexagon::Sched::M_tc_3x_SLOT23:
+  case Hexagon::Sched::S_2op_tc_2_SLOT23:
+  case Hexagon::Sched::S_3op_tc_2_SLOT23:
+  case Hexagon::Sched::S_3op_tc_3x_SLOT23:
+    return true;
+  }
+  return false;
+}
+
+void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
+                                       DuplexCandidate Candidate) {
+  assert(Candidate.packetIndexI < MCB.size());
+  assert(Candidate.packetIndexJ < MCB.size());
+  assert(isBundle(MCB));
+  MCInst *Duplex =
+      deriveDuplex(Context, Candidate.iClass,
+                   *MCB.getOperand(Candidate.packetIndexJ).getInst(),
+                   *MCB.getOperand(Candidate.packetIndexI).getInst());
+  assert(Duplex != nullptr);
+  MCB.getOperand(Candidate.packetIndexI).setInst(Duplex);
+  MCB.erase(MCB.begin() + Candidate.packetIndexJ);
+}
+
+void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | innerLoopMask);
+}
+
+void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | memReorderDisabledMask);
+  assert(isMemReorderDisabled(MCI));
+}
+
+void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
+  assert(isMemStoreReorderEnabled(MCI));
+}
+
+void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | outerLoopMask);
+}
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
new file mode 100644
index 0000000..0237b28
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -0,0 +1,289 @@
+//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions for Hexagon specific MCInst queries
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+class HexagonMCChecker;
+class MCContext;
+class MCInstrDesc;
+class MCInstrInfo;
+class MCInst;
+class MCOperand;
+class MCSubtargetInfo;
+namespace HexagonII {
+enum class MemAccessSize;
+}
+class DuplexCandidate {
+public:
+  unsigned packetIndexI, packetIndexJ, iClass;
+  DuplexCandidate(unsigned i, unsigned j, unsigned iClass)
+      : packetIndexI(i), packetIndexJ(j), iClass(iClass) {}
+};
+namespace HexagonMCInstrInfo {
+size_t const innerLoopOffset = 0;
+int64_t const innerLoopMask = 1 << innerLoopOffset;
+
+size_t const outerLoopOffset = 1;
+int64_t const outerLoopMask = 1 << outerLoopOffset;
+
+// do not reorder memory load/stores by default load/stores are re-ordered
+// and by default loads can be re-ordered
+size_t const memReorderDisabledOffset = 2;
+int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+
+// allow re-ordering of memory stores by default stores cannot be re-ordered
+size_t const memStoreReorderEnabledOffset = 3;
+int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset;
+
+size_t const bundleInstructionsOffset = 1;
+
+void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
+void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+                      MCInst const &MCI);
+
+// Returns a iterator range of instructions in this bundle
+iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
+
+// Returns the number of instructions in the bundle
+size_t bundleSize(MCInst const &MCI);
+
+// Put the packet in to canonical form, compound, duplex, pad, and shuffle
+bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                        MCContext &Context, MCInst &MCB,
+                        HexagonMCChecker *Checker);
+
+// Clamp off upper 26 bits of extendable operand for emission
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+                    MCInst const &MCI, bool MustExtend);
+
+// Create a duplex instruction given the two subinsts
+MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
+                     MCInst const &inst1);
+MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
+                      MCOperand const &MO);
+
+// Convert this instruction in to a duplex subinst
+MCInst deriveSubInst(MCInst const &Inst);
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+
+// Return memory access size
+HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
+                                       MCInst const &MCI);
+
+// Return number of bits in the constant extended operand.
+unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return constant extended operand number.
+unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
+
+MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return which duplex group this instruction belongs to
+unsigned getDuplexCandidateGroup(MCInst const &MI);
+
+// Return a list of all possible instruction duplex combinations
+SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII,
+                                                      MCInst const &MCB);
+
+// Return the index of the extendable operand
+unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return a reference to the extendable operand
+MCOperand const &getExtendableOperand(MCInstrInfo const &MCII,
+                                      MCInst const &MCI);
+
+// Return the implicit alignment of the extendable operand
+unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the number of logical bits of the extendable operand
+unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return instruction name
+char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand index for the new value.
+unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the operand that consumes or produces a new value.
+MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
+                                     MCInst const &MCI);
+
+int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the Hexagon ISA class for the insn.
+unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return the slots used by the insn.
+unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                  MCInst const &MCI);
+
+// Does the packet have an extender for the instruction at Index
+bool hasExtenderForIndex(MCInst const &MCB, size_t Index);
+
+bool hasImmExt(MCInst const &MCI);
+
+// Return whether the instruction is a legal new-value producer.
+bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the instruction at Index
+MCInst const &instruction(MCInst const &MCB, size_t Index);
+
+// Returns whether this MCInst is a wellformed bundle
+bool isBundle(MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return the duplex iclass given the two duplex classes
+unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
+
+int64_t minConstant(MCInst const &MCI, size_t Index);
+template <unsigned N, unsigned S>
+bool inRange(MCInst const &MCI, size_t Index) {
+  return isShiftedUInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N, unsigned S>
+bool inSRange(MCInst const &MCI, size_t Index) {
+  return isShiftedInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
+  return isUInt<N>(minConstant(MCI, Index));
+}
+
+// Return whether the instruction needs to be constant extended.
+bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Is this double register suitable for use in a duplex subinst
+bool isDblRegForSubInst(unsigned Reg);
+
+// Is this a duplex instruction
+bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Can these instructions be duplexed
+bool isDuplexPair(MCInst const &MIa, MCInst const &MIb);
+
+// Can these duplex classes be combine in to a duplex instruction
+bool isDuplexPairMatch(unsigned Ga, unsigned Gb);
+
+// Return true if the insn may be extended based on the operand value.
+bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the instruction must be always extended.
+bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether it is a floating-point insn.
+bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Returns whether this instruction is an immediate extender
+bool isImmext(MCInst const &MCI);
+
+// Returns whether this bundle is an endloop0
+bool isInnerLoop(MCInst const &MCI);
+
+// Is this an integer register
+bool isIntReg(unsigned Reg);
+
+// Is this register suitable for use in a duplex subinst
+bool isIntRegForSubInst(unsigned Reg);
+bool isMemReorderDisabled(MCInst const &MCI);
+bool isMemStoreReorderEnabled(MCInst const &MCI);
+
+// Return whether the insn is a new-value consumer.
+bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return true if the operand can be constant extended.
+bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
+                       unsigned short OperandNum);
+
+// Can these two instructions be duplexed
+bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa,
+                         bool ExtendedA, MCInst const &MIb, bool ExtendedB,
+                         bool bisReversable);
+
+// Returns whether this bundle is an endloop1
+bool isOuterLoop(MCInst const &MCI);
+
+// Return whether this instruction is predicated
+bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the predicate sense is true
+bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Is this a predicate register
+bool isPredReg(unsigned Reg);
+
+// Return whether the insn is a prefix.
+bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether the insn can be packaged only with A and X-type insns.
+bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
+
+/// Return whether the insn can be packaged only with an A-type insn in slot #1.
+bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Pad the bundle with nops to satisfy endloop requirements
+void padEndloop(MCContext &Context, MCInst &MCI);
+
+bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Replace the instructions inside MCB, represented by Candidate
+void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
+
+// Marks a bundle as endloop0
+void setInnerLoop(MCInst &MCI);
+void setMemReorderDisabled(MCInst &MCI);
+void setMemStoreReorderEnabled(MCInst &MCI);
+
+// Marks a bundle as endloop1
+void setOuterLoop(MCInst &MCI);
+
+// Would duplexing this instruction create a requirement to extend
+bool subInstWouldBeExtended(MCInst const &potentialDuplex);
+
+// Attempt to find and replace compound pairs
+void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+}
+}
+
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
new file mode 100644
index 0000000..8e70280
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -0,0 +1,237 @@
+//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-shuffle"
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+    DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false),
+                   cl::desc("Disable Hexagon instruction shuffling"));
+
+void HexagonMCShuffler::init(MCInst &MCB) {
+  if (HexagonMCInstrInfo::isBundle(MCB)) {
+    MCInst const *Extender = nullptr;
+    // Copy the bundle for the shuffling.
+    for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+      MCInst *MI = const_cast<MCInst *>(I.getInst());
+
+      if (!HexagonMCInstrInfo::isImmext(*MI)) {
+        append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+               false);
+        Extender = nullptr;
+      } else
+        Extender = MI;
+    }
+  }
+
+  BundleFlags = MCB.getOperand(0).getImm();
+}
+
+void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI,
+                             bool bInsertAtFront) {
+  if (HexagonMCInstrInfo::isBundle(MCB)) {
+    if (bInsertAtFront && AddMI)
+      append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+             false);
+    MCInst const *Extender = nullptr;
+    // Copy the bundle for the shuffling.
+    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
+      MCInst *MI = const_cast<MCInst *>(I.getInst());
+      if (!HexagonMCInstrInfo::isImmext(*MI)) {
+        append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
+               false);
+        Extender = nullptr;
+      } else
+        Extender = MI;
+    }
+    if (!bInsertAtFront && AddMI)
+      append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
+             false);
+  }
+
+  BundleFlags = MCB.getOperand(0).getImm();
+}
+
+void HexagonMCShuffler::copyTo(MCInst &MCB) {
+  MCB.clear();
+  MCB.addOperand(MCOperand::createImm(BundleFlags));
+  // Copy the results into the bundle.
+  for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
+
+    MCInst const *MI = I->getDesc();
+    MCInst const *Extender = I->getExtender();
+    if (Extender)
+      MCB.addOperand(MCOperand::createInst(Extender));
+    MCB.addOperand(MCOperand::createInst(MI));
+  }
+}
+
+bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
+  if (shuffle()) {
+    // Copy the results into the bundle.
+    copyTo(MCB);
+  } else
+    DEBUG(MCB.dump());
+
+  return (!getError());
+}
+
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                            MCInst &MCB) {
+  HexagonMCShuffler MCS(MCII, STI, MCB);
+
+  if (DisableShuffle)
+    // Ignore if user chose so.
+    return false;
+
+  if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+    // There once was a bundle:
+    //    BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+    //      * %D2<def> = IMPLICIT_DEF; flags:
+    //      * %D7<def> = IMPLICIT_DEF; flags:
+    // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+    // became empty.
+    DEBUG(dbgs() << "Skipping empty bundle");
+    return false;
+  } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+    DEBUG(dbgs() << "Skipping stand-alone insn");
+    return false;
+  }
+
+  // Reorder the bundle and copy the result.
+  if (!MCS.reshuffleTo(MCB)) {
+    // Unless there is any error, which should not happen at this point.
+    unsigned shuffleError = MCS.getError();
+    switch (shuffleError) {
+    default:
+      llvm_unreachable("unknown error");
+    case HexagonShuffler::SHUFFLE_ERROR_INVALID:
+      llvm_unreachable("invalid packet");
+    case HexagonShuffler::SHUFFLE_ERROR_STORES:
+      llvm_unreachable("too many stores");
+    case HexagonShuffler::SHUFFLE_ERROR_LOADS:
+      llvm_unreachable("too many loads");
+    case HexagonShuffler::SHUFFLE_ERROR_BRANCHES:
+      llvm_unreachable("too many branches");
+    case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS:
+      llvm_unreachable("no suitable slot");
+    case HexagonShuffler::SHUFFLE_ERROR_SLOTS:
+      llvm_unreachable("over-subscribed slots");
+    case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case.
+      return true;
+    }
+  }
+
+  return true;
+}
+
+unsigned
+llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                       MCContext &Context, MCInst &MCB,
+                       SmallVector<DuplexCandidate, 8> possibleDuplexes) {
+
+  if (DisableShuffle)
+    return HexagonShuffler::SHUFFLE_SUCCESS;
+
+  if (!HexagonMCInstrInfo::bundleSize(MCB)) {
+    // There once was a bundle:
+    //    BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ...
+    //      * %D2<def> = IMPLICIT_DEF; flags:
+    //      * %D7<def> = IMPLICIT_DEF; flags:
+    // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
+    // became empty.
+    DEBUG(dbgs() << "Skipping empty bundle");
+    return HexagonShuffler::SHUFFLE_SUCCESS;
+  } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
+    DEBUG(dbgs() << "Skipping stand-alone insn");
+    return HexagonShuffler::SHUFFLE_SUCCESS;
+  }
+
+  bool doneShuffling = false;
+  unsigned shuffleError;
+  while (possibleDuplexes.size() > 0 && (!doneShuffling)) {
+    // case of Duplex Found
+    DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
+    MCInst Attempt(MCB);
+    HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
+    HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler
+    if (MCS.size() == 1) {                     // case of one duplex
+      // copy the created duplex in the shuffler to the bundle
+      MCS.copyTo(MCB);
+      doneShuffling = true;
+      return HexagonShuffler::SHUFFLE_SUCCESS;
+    }
+    // try shuffle with this duplex
+    doneShuffling = MCS.reshuffleTo(MCB);
+    shuffleError = MCS.getError();
+
+    if (doneShuffling)
+      break;
+  }
+
+  if (doneShuffling == false) {
+    HexagonMCShuffler MCS(MCII, STI, MCB);
+    doneShuffling = MCS.reshuffleTo(MCB); // shuffle
+    shuffleError = MCS.getError();
+  }
+  if (!doneShuffling)
+    return shuffleError;
+
+  return HexagonShuffler::SHUFFLE_SUCCESS;
+}
+
+bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                            MCInst &MCB, MCInst const *AddMI, int fixupCount) {
+  if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI)
+    return false;
+
+  // if fixups present, make sure we don't insert too many nops that would
+  // later prevent an extender from being inserted.
+  unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB);
+  if (bundleSize >= HEXAGON_PACKET_SIZE)
+    return false;
+  if (fixupCount >= 2) {
+    return false;
+  } else {
+    if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount)
+      return false;
+  }
+
+  if (DisableShuffle)
+    return false;
+
+  HexagonMCShuffler MCS(MCII, STI, MCB, AddMI);
+  if (!MCS.reshuffleTo(MCB)) {
+    unsigned shuffleError = MCS.getError();
+    switch (shuffleError) {
+    default:
+      return false;
+    case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case
+      return true;
+    }
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
new file mode 100644
index 0000000..a21cce1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -0,0 +1,65 @@
+//=-- HexagonMCShuffler.h ---------------------------------------------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCSHUFFLER_H
+#define HEXAGONMCSHUFFLER_H
+
+#include "MCTargetDesc/HexagonShuffler.h"
+
+namespace llvm {
+
+class MCInst;
+
+// Insn bundle shuffler.
+class HexagonMCShuffler : public HexagonShuffler {
+  bool immext_present;
+  bool duplex_present;
+
+public:
+  HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                    MCInst &MCB)
+      : HexagonShuffler(MCII, STI) {
+    init(MCB);
+  };
+  HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                    MCInst &MCB, const MCInst *AddMI,
+                    bool bInsertAtFront = false)
+      : HexagonShuffler(MCII, STI) {
+    init(MCB, AddMI, bInsertAtFront);
+  };
+
+  // Copy reordered bundle to another.
+  void copyTo(MCInst &MCB);
+  // Reorder and copy result to another.
+  bool reshuffleTo(MCInst &MCB);
+
+  bool immextPresent() const { return immext_present; };
+  bool duplexPresent() const { return duplex_present; };
+
+private:
+  void init(MCInst &MCB);
+  void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false);
+};
+
+// Invocation of the shuffler.
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                      MCInst &);
+bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                      MCInst &, const MCInst *, int);
+unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                          MCContext &Context, MCInst &,
+                          SmallVector<DuplexCandidate, 8>);
+}
+
+#endif // HEXAGONMCSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 0000000..9a29257
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,244 @@
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "Hexagon.h"
+#include "HexagonMCAsmInfo.h"
+#include "HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+cl::opt<bool> llvm::HexagonDisableCompound
+  ("mno-compound",
+   cl::desc("Disable looking for compound instructions for Hexagon"));
+
+cl::opt<bool> llvm::HexagonDisableDuplex
+  ("mno-pairing",
+   cl::desc("Disable looking for duplex instructions for Hexagon"));
+
+StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+  if (CPU.empty())
+    CPU = "hexagonv60";
+  return CPU;
+}
+
+MCInstrInfo *llvm::createHexagonMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitHexagonMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitHexagonMCRegisterInfo(X, Hexagon::R0);
+  return X;
+}
+
+static MCSubtargetInfo *
+createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+  CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
+  return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+namespace {
+class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
+public:
+  HexagonTargetAsmStreamer(MCStreamer &S,
+                           formatted_raw_ostream &, bool,
+                           MCInstPrinter &)
+      : HexagonTargetStreamer(S) {}
+  void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
+                      const MCInst &Inst, const MCSubtargetInfo &STI) override {
+    assert(HexagonMCInstrInfo::isBundle(Inst));
+    assert(HexagonMCInstrInfo::bundleSize(Inst) <= HEXAGON_PACKET_SIZE);
+    std::string Buffer;
+    {
+      raw_string_ostream TempStream(Buffer);
+      InstPrinter.printInst(&Inst, TempStream, "", STI);
+    }
+    StringRef Contents(Buffer);
+    auto PacketBundle = Contents.rsplit('\n');
+    auto HeadTail = PacketBundle.first.split('\n');
+    StringRef Separator = "\n";
+    StringRef Indent = "\t\t";
+    OS << "\t{\n";
+    while (!HeadTail.first.empty()) {
+      StringRef InstTxt;
+      auto Duplex = HeadTail.first.split('\v');
+      if (!Duplex.second.empty()) {
+        OS << Indent << Duplex.first << Separator;
+        InstTxt = Duplex.second;
+      } else if (!HeadTail.first.trim().startswith("immext")) {
+        InstTxt = Duplex.first;
+      }
+      if (!InstTxt.empty())
+        OS << Indent << InstTxt << Separator;
+      HeadTail = HeadTail.second.split('\n');
+    }
+    OS << "\t}" << PacketBundle.second;
+  }
+};
+}
+
+namespace {
+class HexagonTargetELFStreamer : public HexagonTargetStreamer {
+public:
+  MCELFStreamer &getStreamer() {
+    return static_cast<MCELFStreamer &>(Streamer);
+  }
+  HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
+      : HexagonTargetStreamer(S) {
+    auto Bits = STI.getFeatureBits();
+    unsigned Flags;
+    if (Bits.to_ullong() & llvm::Hexagon::ArchV5)
+      Flags = ELF::EF_HEXAGON_MACH_V5;
+    else
+      Flags = ELF::EF_HEXAGON_MACH_V4;
+    getStreamer().getAssembler().setELFHeaderEFlags(Flags);
+  }
+  void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                              unsigned ByteAlignment,
+                              unsigned AccessSize) override {
+    HexagonMCELFStreamer &HexagonELFStreamer =
+        static_cast<HexagonMCELFStreamer &>(getStreamer());
+    HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment,
+                                                 AccessSize);
+  }
+  void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+                                   unsigned ByteAlignment,
+                                   unsigned AccessSize) override {
+    HexagonMCELFStreamer &HexagonELFStreamer =
+        static_cast<HexagonMCELFStreamer &>(getStreamer());
+    HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(
+        Symbol, Size, ByteAlignment, AccessSize);
+  }
+};
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
+                                         const Triple &TT) {
+  MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
+
+  // VirtualFP = (R30 + #0).
+  MCCFIInstruction Inst =
+      MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
+  MAI->addInitialFrameState(Inst);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
+                                                 Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default)
+    RM = Reloc::Static;
+  X->initMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0)
+    return (new HexagonInstPrinter(MAI, MII, MRI));
+  else
+    return nullptr;
+}
+
+static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
+                                                   formatted_raw_ostream &OS,
+                                                   MCInstPrinter *InstPrint,
+                                                   bool IsVerboseAsm) {
+  return new HexagonTargetAsmStreamer(S,  OS, IsVerboseAsm, *InstPrint);
+}
+
+static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll) {
+  return createHexagonELFStreamer(Context, MAB, OS, Emitter);
+}
+
+static MCTargetStreamer *
+createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
+  return new HexagonTargetELFStreamer(S, STI);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+                                        createHexagonMCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
+                                      createHexagonMCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+                                    createHexagonMCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+                                          createHexagonMCSubtargetInfo);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget,
+                                        createHexagonMCCodeEmitter);
+
+  // Register the asm backend
+  TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
+                                       createHexagonAsmBackend);
+
+  // Register the obj streamer
+  TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer);
+
+  // Register the asm streamer
+  TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget,
+                                            createMCAsmTargetStreamer);
+
+  // Register the MC Inst Printer
+  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+                                        createHexagonMCInstPrinter);
+
+  TargetRegistry::RegisterObjectTargetStreamer(
+      TheHexagonTarget, createHexagonObjectTargetStreamer);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 0000000..a005a01
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,75 @@
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+
+#include <cstdint>
+
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+struct InstrItinerary;
+struct InstrStage;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class Triple;
+class StringRef;
+class raw_ostream;
+class raw_pwrite_stream;
+
+extern Target TheHexagonTarget;
+extern cl::opt<bool> HexagonDisableCompound;
+extern cl::opt<bool> HexagonDisableDuplex;
+extern const InstrStage HexagonStages[];
+
+MCInstrInfo *createHexagonMCInstrInfo();
+
+MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          MCContext &MCT);
+
+MCAsmBackend *createHexagonAsmBackend(const Target &T,
+                                      const MCRegisterInfo &MRI,
+                                      const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI, StringRef CPU);
+
+namespace HEXAGON_MC {
+  StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+}
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
new file mode 100644
index 0000000..6ceb848
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -0,0 +1,470 @@
+//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-shuffle"
+
+#include <algorithm>
+#include <utility>
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "HexagonShuffler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// Insn shuffling priority.
+class HexagonBid {
+  // The priority is directly proportional to how restricted the insn is based
+  // on its flexibility to run on the available slots.  So, the fewer slots it
+  // may run on, the higher its priority.
+  enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15.
+  unsigned Bid;
+
+public:
+  HexagonBid() : Bid(0){};
+  HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; };
+
+  // Check if the insn priority is overflowed.
+  bool isSold() const { return (Bid >= MAX); };
+
+  HexagonBid &operator+=(const HexagonBid &B) {
+    Bid += B.Bid;
+    return *this;
+  };
+};
+
+// Slot shuffling allocation.
+class HexagonUnitAuction {
+  HexagonBid Scores[HEXAGON_PACKET_SIZE];
+  // Mask indicating which slot is unavailable.
+  unsigned isSold : HEXAGON_PACKET_SIZE;
+
+public:
+  HexagonUnitAuction() : isSold(0){};
+
+  // Allocate slots.
+  bool bid(unsigned B) {
+    // Exclude already auctioned slots from the bid.
+    unsigned b = B & ~isSold;
+    if (b) {
+      for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i)
+        if (b & (1 << i)) {
+          // Request candidate slots.
+          Scores[i] += HexagonBid(b);
+          isSold |= Scores[i].isSold() << i;
+        }
+      return true;
+      ;
+    } else
+      // Error if the desired slots are already full.
+      return false;
+  };
+};
+} // end anonymous namespace
+
+unsigned HexagonResource::setWeight(unsigned s) {
+  const unsigned SlotWeight = 8;
+  const unsigned MaskWeight = SlotWeight - 1;
+  bool Key = (1 << s) & getUnits();
+
+  // TODO: Improve this API so that we can prevent misuse statically.
+  assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+
+  // Calculate relative weight of the insn for the given slot, weighing it the
+  // heavier the more restrictive the insn is and the lowest the slots that the
+  // insn may be executed in.
+  Weight =
+      (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits()))
+                                   << countTrailingZeros(getUnits()));
+  return (Weight);
+}
+
+HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
+
+bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
+
+bool HexagonCVIResource::setup() {
+  assert(!TUL);
+  TUL = new (TypeUnitsAndLanes);
+
+  (*TUL)[HexagonII::TypeCVI_VA] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
+  (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
+  (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
+  (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
+  (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_LD] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
+  (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_ST] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
+  (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
+
+  return true;
+}
+
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+                                       MCInst const *id)
+    : HexagonResource(s) {
+  unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
+
+  if (TUL->count(T)) {
+    // For an HVX insn.
+    Valid = true;
+    setUnits((*TUL)[T].first);
+    setLanes((*TUL)[T].second);
+    setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+    setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
+  } else {
+    // For core insns.
+    Valid = false;
+    setUnits(0);
+    setLanes(0);
+    setLoad(false);
+    setStore(false);
+  }
+}
+
+HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
+                                 MCSubtargetInfo const &STI)
+    : MCII(MCII), STI(STI) {
+  reset();
+}
+
+void HexagonShuffler::reset() {
+  Packet.clear();
+  BundleFlags = 0;
+  Error = SHUFFLE_SUCCESS;
+}
+
+void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
+                             unsigned S, bool X) {
+  HexagonInstr PI(MCII, ID, Extender, S, X);
+
+  Packet.push_back(PI);
+}
+
+/// Check that the packet is legal and enforce relative insn order.
+bool HexagonShuffler::check() {
+  // Descriptive slot masks.
+  const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
+                 slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4,
+                 slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
+  // Highest slots for branches and stores used to keep their original order.
+  unsigned slotJump = slotFirstJump;
+  unsigned slotLoadStore = slotFirstLoadStore;
+  // Number of branches, solo branches, indirect branches.
+  unsigned jumps = 0, jump1 = 0, jumpr = 0;
+  // Number of memory operations, loads, solo loads, stores, solo stores, single
+  // stores.
+  unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+  // Number of HVX loads, HVX stores.
+  unsigned CVIloads = 0, CVIstores = 0;
+  // Number of duplex insns, solo insns.
+  unsigned duplex = 0, solo = 0;
+  // Number of insns restricting other insns in the packet to A and X types,
+  // which is neither A or X types.
+  unsigned onlyAX = 0, neitherAnorX = 0;
+  // Number of insns restricting other insns in slot #1 to A type.
+  unsigned onlyAin1 = 0;
+  // Number of insns restricting any insn in slot #1, except A2_nop.
+  unsigned onlyNo1 = 0;
+  unsigned xtypeFloat = 0;
+  unsigned pSlot3Cnt = 0;
+  iterator slot3ISJ = end();
+
+  // Collect information from the insns in the packet.
+  for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+    MCInst const *ID = ISJ->getDesc();
+
+    if (HexagonMCInstrInfo::isSolo(MCII, *ID))
+      solo += !ISJ->isSoloException();
+    else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID))
+      onlyAX += !ISJ->isSoloException();
+    else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID))
+      onlyAin1 += !ISJ->isSoloException();
+    if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 &&
+        HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE)
+      ++neitherAnorX;
+    if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) {
+      ++pSlot3Cnt;
+      slot3ISJ = ISJ;
+    }
+
+    switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
+    case HexagonII::TypeXTYPE:
+      if (HexagonMCInstrInfo::isFloat(MCII, *ID))
+        ++xtypeFloat;
+      break;
+    case HexagonII::TypeJR:
+      ++jumpr;
+    // Fall-through.
+    case HexagonII::TypeJ:
+      ++jumps;
+      break;
+    case HexagonII::TypeCVI_VM_VP_LDU:
+      ++onlyNo1;
+    case HexagonII::TypeCVI_VM_LD:
+    case HexagonII::TypeCVI_VM_TMP_LD:
+    case HexagonII::TypeCVI_VM_CUR_LD:
+      ++CVIloads;
+    case HexagonII::TypeLD:
+      ++loads;
+      ++memory;
+      if (ISJ->Core.getUnits() == slotSingleLoad)
+        ++load0;
+      if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
+        ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+      break;
+    case HexagonII::TypeCVI_VM_STU:
+      ++onlyNo1;
+    case HexagonII::TypeCVI_VM_ST:
+    case HexagonII::TypeCVI_VM_NEW_ST:
+      ++CVIstores;
+    case HexagonII::TypeST:
+      ++stores;
+      ++memory;
+      if (ISJ->Core.getUnits() == slotSingleStore)
+        ++store0;
+      break;
+    case HexagonII::TypeMEMOP:
+      ++loads;
+      ++stores;
+      ++store1;
+      ++memory;
+      break;
+    case HexagonII::TypeNV:
+      ++memory; // NV insns are memory-like.
+      if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch())
+        ++jumps, ++jump1;
+      break;
+    case HexagonII::TypeCR:
+    // Legacy conditional branch predicated on a register.
+    case HexagonII::TypeSYSTEM:
+      if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad())
+        ++loads;
+      break;
+    }
+  }
+
+  // Check if the packet is legal.
+  if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
+      (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
+      (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
+    Error = SHUFFLE_ERROR_INVALID;
+    return false;
+  }
+
+  if (jump1 && jumps > 1) {
+    // Error if single branch with another branch.
+    Error = SHUFFLE_ERROR_BRANCHES;
+    return false;
+  }
+
+  // Modify packet accordingly.
+  // TODO: need to reserve slots #0 and #1 for duplex insns.
+  bool bOnlySlot3 = false;
+  for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+    MCInst const *ID = ISJ->getDesc();
+
+    if (!ISJ->Core.getUnits()) {
+      // Error if insn may not be executed in any slot.
+      Error = SHUFFLE_ERROR_UNKNOWN;
+      return false;
+    }
+
+    // Exclude from slot #1 any insn but A2_nop.
+    if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop)
+      if (onlyNo1)
+        ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+
+    // Exclude from slot #1 any insn but A-type.
+    if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32)
+      if (onlyAin1)
+        ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
+
+    // Branches must keep the original order.
+    if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
+        HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
+      if (jumps > 1) {
+        if (jumpr || slotJump < slotLastJump) {
+          // Error if indirect branch with another branch or
+          // no more slots available for branches.
+          Error = SHUFFLE_ERROR_BRANCHES;
+          return false;
+        }
+        // Pin the branch to the highest slot available to it.
+        ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump);
+        // Update next highest slot available to branches.
+        slotJump >>= 1;
+      }
+
+    // A single load must use slot #0.
+    if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) {
+      if (loads == 1 && loads == memory)
+        // Pin the load to slot #0.
+        ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
+    }
+
+    // A single store must use slot #0.
+    if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) {
+      if (!store0) {
+        if (stores == 1)
+          ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
+        else if (stores > 1) {
+          if (slotLoadStore < slotLastLoadStore) {
+            // Error if no more slots available for stores.
+            Error = SHUFFLE_ERROR_STORES;
+            return false;
+          }
+          // Pin the store to the highest slot available to it.
+          ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
+          // Update the next highest slot available to stores.
+          slotLoadStore >>= 1;
+        }
+      }
+      if (store1 && stores > 1) {
+        // Error if a single store with another store.
+        Error = SHUFFLE_ERROR_STORES;
+        return false;
+      }
+    }
+
+    // flag if an instruction can only be executed in slot 3
+    if (ISJ->Core.getUnits() == slotThree)
+      bOnlySlot3 = true;
+
+    if (!ISJ->Core.getUnits()) {
+      // Error if insn may not be executed in any slot.
+      Error = SHUFFLE_ERROR_NOSLOTS;
+      return false;
+    }
+  }
+
+  bool validateSlots = true;
+  if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+    // save off slot mask of instruction marked with A_PREFER_SLOT3
+    // and then pin it to slot #3
+    unsigned saveUnits = slot3ISJ->Core.getUnits();
+    slot3ISJ->Core.setUnits(saveUnits & slotThree);
+
+    HexagonUnitAuction AuctionCore;
+    std::sort(begin(), end(), HexagonInstr::lessCore);
+
+    // see if things ok with that instruction being pinned to slot #3
+    bool bFail = false;
+    for (iterator I = begin(); I != end() && bFail != true; ++I)
+      if (!AuctionCore.bid(I->Core.getUnits()))
+        bFail = true;
+
+    // if yes, great, if not then restore original slot mask
+    if (!bFail)
+      validateSlots = false; // all good, no need to re-do auction
+    else
+      for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+        MCInst const *ID = ISJ->getDesc();
+        if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID))
+          ISJ->Core.setUnits(saveUnits);
+      }
+  }
+
+  // Check if any slot, core, is over-subscribed.
+  // Verify the core slot subscriptions.
+  if (validateSlots) {
+    HexagonUnitAuction AuctionCore;
+
+    std::sort(begin(), end(), HexagonInstr::lessCore);
+
+    for (iterator I = begin(); I != end(); ++I)
+      if (!AuctionCore.bid(I->Core.getUnits())) {
+        Error = SHUFFLE_ERROR_SLOTS;
+        return false;
+      }
+  }
+  // Verify the CVI slot subscriptions.
+  {
+    HexagonUnitAuction AuctionCVI;
+
+    std::sort(begin(), end(), HexagonInstr::lessCVI);
+
+    for (iterator I = begin(); I != end(); ++I)
+      for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
+        if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
+          Error = SHUFFLE_ERROR_SLOTS;
+          return false;
+        }
+  }
+
+  Error = SHUFFLE_SUCCESS;
+  return true;
+}
+
+bool HexagonShuffler::shuffle() {
+  if (size() > HEXAGON_PACKET_SIZE) {
+    // Ignore a packet with with more than what a packet can hold
+    // or with compound or duplex insns for now.
+    Error = SHUFFLE_ERROR_INVALID;
+    return false;
+  }
+
+  // Check and prepare packet.
+  if (size() > 1 && check())
+    // Reorder the handles for each slot.
+    for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE;
+         ++nSlot) {
+      iterator ISJ, ISK;
+      unsigned slotSkip, slotWeight;
+
+      // Prioritize the handles considering their restrictions.
+      for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0;
+           ISK != Packet.end(); ++ISK, ++slotSkip)
+        if (slotSkip < nSlot - emptySlots)
+          // Note which handle to begin at.
+          ++ISJ;
+        else
+          // Calculate the weight of the slot.
+          slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1);
+
+      if (slotWeight)
+        // Sort the packet, favoring source order,
+        // beginning after the previous slot.
+        std::sort(ISJ, Packet.end());
+      else
+        // Skip unused slot.
+        ++emptySlots;
+    }
+
+  for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
+    DEBUG(dbgs().write_hex(ISJ->Core.getUnits());
+          dbgs() << ':'
+                 << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc())
+                        .getOpcode();
+          dbgs() << '\n');
+  DEBUG(dbgs() << '\n');
+
+  return (!getError());
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
new file mode 100644
index 0000000..174f10f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -0,0 +1,184 @@
+//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the shuffling of insns inside a bundle according to the
+// packet formation rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONSHUFFLER_H
+#define HEXAGONSHUFFLER_H
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+// Insn resources.
+class HexagonResource {
+  // Mask of the slots or units that may execute the insn and
+  // the weight or priority that the insn requires to be assigned a slot.
+  unsigned Slots, Weight;
+
+public:
+  HexagonResource(unsigned s) { setUnits(s); };
+
+  void setUnits(unsigned s) {
+    Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
+  };
+  unsigned setWeight(unsigned s);
+
+  unsigned getUnits() const { return (Slots); };
+  unsigned getWeight() const { return (Weight); };
+
+  // Check if the resources are in ascending slot order.
+  static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
+    return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
+  };
+  // Check if the resources are in ascending weight order.
+  static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
+    return (A.getWeight() < B.getWeight());
+  };
+};
+
+// HVX insn resources.
+class HexagonCVIResource : public HexagonResource {
+  typedef std::pair<unsigned, unsigned> UnitsAndLanes;
+  typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+
+  // Available HVX slots.
+  enum {
+    CVI_NONE = 0,
+    CVI_XLANE = 1 << 0,
+    CVI_SHIFT = 1 << 1,
+    CVI_MPY0 = 1 << 2,
+    CVI_MPY1 = 1 << 3
+  };
+
+  static bool SetUp;
+  static bool setup();
+  static TypeUnitsAndLanes *TUL;
+
+  // Count of adjacent slots that the insn requires to be executed.
+  unsigned Lanes;
+  // Flag whether the insn is a load or a store.
+  bool Load, Store;
+  // Flag whether the HVX resources are valid.
+  bool Valid;
+
+  void setLanes(unsigned l) { Lanes = l; };
+  void setLoad(bool f = true) { Load = f; };
+  void setStore(bool f = true) { Store = f; };
+
+public:
+  HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+
+  bool isValid() const { return (Valid); };
+  unsigned getLanes() const { return (Lanes); };
+  bool mayLoad() const { return (Load); };
+  bool mayStore() const { return (Store); };
+};
+
+// Handle to an insn used by the shuffling algorithm.
+class HexagonInstr {
+  friend class HexagonShuffler;
+
+  MCInst const *ID;
+  MCInst const *Extender;
+  HexagonResource Core;
+  HexagonCVIResource CVI;
+  bool SoloException;
+
+public:
+  HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+               MCInst const *Extender, unsigned s, bool x = false)
+      : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
+        SoloException(x){};
+
+  MCInst const *getDesc() const { return (ID); };
+
+  MCInst const *getExtender() const { return Extender; }
+
+  unsigned isSoloException() const { return (SoloException); };
+
+  // Check if the handles are in ascending order for shuffling purposes.
+  bool operator<(const HexagonInstr &B) const {
+    return (HexagonResource::lessWeight(B.Core, Core));
+  };
+  // Check if the handles are in ascending order by core slots.
+  static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
+    return (HexagonResource::lessUnits(A.Core, B.Core));
+  };
+  // Check if the handles are in ascending order by HVX slots.
+  static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) {
+    return (HexagonResource::lessUnits(A.CVI, B.CVI));
+  };
+};
+
+// Bundle shuffler.
+class HexagonShuffler {
+  typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE>
+      HexagonPacket;
+
+  // Insn handles in a bundle.
+  HexagonPacket Packet;
+
+  // Shuffling error code.
+  unsigned Error;
+
+protected:
+  int64_t BundleFlags;
+  MCInstrInfo const &MCII;
+  MCSubtargetInfo const &STI;
+
+public:
+  typedef HexagonPacket::iterator iterator;
+
+  enum {
+    SHUFFLE_SUCCESS = 0,    ///< Successful operation.
+    SHUFFLE_ERROR_INVALID,  ///< Invalid bundle.
+    SHUFFLE_ERROR_STORES,   ///< No free slots for store insns.
+    SHUFFLE_ERROR_LOADS,    ///< No free slots for load insns.
+    SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
+    SHUFFLE_ERROR_NOSLOTS,  ///< No free slots for other insns.
+    SHUFFLE_ERROR_SLOTS,    ///< Over-subscribed slots.
+    SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
+    SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
+    SHUFFLE_ERROR_UNKNOWN   ///< Unknown error.
+  };
+
+  explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
+
+  // Reset to initial state.
+  void reset();
+  // Check if the bundle may be validly shuffled.
+  bool check();
+  // Reorder the insn handles in the bundle.
+  bool shuffle();
+
+  unsigned size() const { return (Packet.size()); };
+
+  iterator begin() { return (Packet.begin()); };
+  iterator end() { return (Packet.end()); };
+
+  // Add insn handle to the bundle .
+  void append(MCInst const *ID, MCInst const *Extender, unsigned S,
+              bool X = false);
+
+  // Return the error code for the last check or shuffling of the bundle.
+  void setError(unsigned Err) { Error = Err; };
+  unsigned getError() const { return (Error); };
+};
+}
+
+#endif // HEXAGONSHUFFLER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 0000000..40f6c8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+  RegisterTarget<Triple::hexagon, /*HasJIT=*/false>  X(TheHexagonTarget, "hexagon", "Hexagon");
+}