58 files changed, 3483 insertions, 988 deletions
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index c0355ae..b4763ca 100644
--- a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -54,9 +54,9 @@ static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
 
 extern "C" void LLVMInitializeWebAssemblyDisassembler() {
   // Register the disassembler for each target.
-  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32,
+  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
                                          createWebAssemblyDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64,
+  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
                                          createWebAssemblyDisassembler);
 }
 
@@ -93,6 +93,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     const MCOperandInfo &Info = Desc.OpInfo[i];
     switch (Info.OperandType) {
     case MCOI::OPERAND_IMMEDIATE:
+    case WebAssembly::OPERAND_LOCAL:
     case WebAssembly::OPERAND_P2ALIGN:
     case WebAssembly::OPERAND_BASIC_BLOCK: {
       if (Pos + sizeof(uint64_t) > Bytes.size())
@@ -110,8 +111,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
       MI.addOperand(MCOperand::createReg(Reg));
       break;
     }
-    case WebAssembly::OPERAND_FP32IMM:
-    case WebAssembly::OPERAND_FP64IMM: {
+    case WebAssembly::OPERAND_F32IMM:
+    case WebAssembly::OPERAND_F64IMM: {
       // TODO: MC converts all floating point immediate operands to double.
       // This is fine for numeric values, but may cause NaNs to change bits.
       if (Pos + sizeof(uint64_t) > Bytes.size())
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 267d716..0af13cf 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -54,7 +54,12 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   if (Desc.isVariadic())
     for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
-      if (i != 0)
+      // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
+      // we have an extra flags operand which is not currently printed, for
+      // compatiblity reasons.
+      if (i != 0 &&
+          (MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID ||
+           i != Desc.getNumOperands()))
         OS << ", ";
       printOperand(MI, i, OS);
     }
@@ -69,11 +74,8 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     default:
       break;
     case WebAssembly::LOOP: {
-      // Grab the TopLabel value first so that labels print in numeric order.
-      uint64_t TopLabel = ControlFlowCounter++;
-      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
-      printAnnotation(OS, "label" + utostr(TopLabel) + ':');
-      ControlFlowStack.push_back(std::make_pair(TopLabel, true));
+      printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
       break;
     }
     case WebAssembly::BLOCK:
@@ -81,8 +83,6 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
       break;
     case WebAssembly::END_LOOP:
       ControlFlowStack.pop_back();
-      printAnnotation(
-          OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
       break;
     case WebAssembly::END_BLOCK:
       printAnnotation(
@@ -94,9 +94,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     unsigned NumFixedOperands = Desc.NumOperands;
     SmallSet<uint64_t, 8> Printed;
     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
-      const MCOperandInfo &Info = Desc.OpInfo[i];
       if (!(i < NumFixedOperands
-                ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK)
+                ? (Desc.OpInfo[i].OperandType ==
+                   WebAssembly::OPERAND_BASIC_BLOCK)
                 : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
         continue;
       uint64_t Depth = MI->getOperand(i).getImm();
@@ -113,7 +113,8 @@ static std::string toString(const APFloat &FP) {
   // Print NaNs with custom payloads specially.
   if (FP.isNaN() &&
       !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
-      !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
+      !FP.bitwiseIsEqual(
+          APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
     APInt AI = FP.bitcastToAPInt();
     return
         std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
@@ -154,11 +155,12 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
       O << '=';
   } else if (Op.isImm()) {
-    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
-            (MII.get(MI->getOpcode()).TSFlags &
-             WebAssemblyII::VariableOpIsImmediate)) &&
+    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+    assert((OpNo < Desc.getNumOperands() ||
+            (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate)) &&
            "WebAssemblyII::VariableOpIsImmediate should be set for "
            "variable_ops immediate ops");
+    (void)Desc;
     // TODO: (MII.get(MI->getOpcode()).TSFlags &
     //        WebAssemblyII::VariableOpImmediateIsLabel)
     // can tell us whether this is an immediate referencing a label in the
@@ -171,12 +173,12 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     assert(Desc.TSFlags == 0 &&
            "WebAssembly variable_ops floating point ops don't use TSFlags");
     const MCOperandInfo &Info = Desc.OpInfo[OpNo];
-    if (Info.OperandType == WebAssembly::OPERAND_FP32IMM) {
+    if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
       // TODO: MC converts all floating point immediate operands to double.
       // This is fine for numeric values, but may cause NaNs to change bits.
       O << toString(APFloat(float(Op.getFPImm())));
     } else {
-      assert(Info.OperandType == WebAssembly::OPERAND_FP64IMM);
+      assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
       O << toString(APFloat(Op.getFPImm()));
     }
   } else {
@@ -200,6 +202,27 @@ WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
   O << ":p2align=" << Imm;
 }
 
+void
+WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+                                                         unsigned OpNo,
+                                                         raw_ostream &O) {
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  switch (WebAssembly::ExprType(Imm)) {
+  case WebAssembly::ExprType::Void: break;
+  case WebAssembly::ExprType::I32: O << "i32"; break;
+  case WebAssembly::ExprType::I64: O << "i64"; break;
+  case WebAssembly::ExprType::F32: O << "f32"; break;
+  case WebAssembly::ExprType::F64: O << "f64"; break;
+  case WebAssembly::ExprType::I8x16: O << "i8x16"; break;
+  case WebAssembly::ExprType::I16x8: O << "i16x8"; break;
+  case WebAssembly::ExprType::I32x4: O << "i32x4"; break;
+  case WebAssembly::ExprType::F32x4: O << "f32x4"; break;
+  case WebAssembly::ExprType::B8x16: O << "b8x16"; break;
+  case WebAssembly::ExprType::B16x8: O << "b16x8"; break;
+  case WebAssembly::ExprType::B32x4: O << "b32x4"; break;
+  }
+}
+
 const char *llvm::WebAssembly::TypeToString(MVT Ty) {
   switch (Ty.SimpleTy) {
   case MVT::i32:
@@ -210,6 +233,11 @@ const char *llvm::WebAssembly::TypeToString(MVT Ty) {
     return "f32";
   case MVT::f64:
     return "f64";
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+    return "v128";
   default:
     llvm_unreachable("unsupported type");
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 07b0f91..d11f99c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -39,6 +39,8 @@ public:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O);
+  void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O);
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index df6fb89..97454a8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -66,8 +66,10 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
   if (Count == 0)
     return true;
 
-  // FIXME: Do something.
-  return false;
+  for (uint64_t i = 0; i < Count; ++i)
+    OW->write8(WebAssembly::Nop);
+
+  return true;
 }
 
 void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 23f8b3d..d0e0eec 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -45,7 +46,7 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
                          const MCSubtargetInfo &STI) const override;
 
 public:
-  WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+  explicit WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
 };
 } // end anonymous namespace
 
@@ -56,30 +57,59 @@ MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
 void WebAssemblyMCCodeEmitter::encodeInstruction(
     const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
-  // FIXME: This is not the real binary encoding. This is an extremely
-  // over-simplified encoding where we just use uint64_t for everything. This
-  // is a temporary measure.
-  support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
+  uint64_t Start = OS.tell();
+
+  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+  assert(Binary < UINT8_MAX && "Multi-byte opcodes not supported yet");
+  OS << uint8_t(Binary);
+
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  if (Desc.isVariadic())
-    support::endian::Writer<support::little>(OS).write<uint64_t>(
-        MI.getNumOperands() - Desc.NumOperands);
   for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
     const MCOperand &MO = MI.getOperand(i);
     if (MO.isReg()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
+      /* nothing to encode */
     } else if (MO.isImm()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
+      if (i < Desc.getNumOperands()) {
+        assert(Desc.TSFlags == 0 &&
+               "WebAssembly non-variable_ops don't use TSFlags");
+        const MCOperandInfo &Info = Desc.OpInfo[i];
+        if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+          encodeSLEB128(int32_t(MO.getImm()), OS);
+        } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+          encodeSLEB128(int64_t(MO.getImm()), OS);
+        } else {
+          encodeULEB128(uint64_t(MO.getImm()), OS);
+        }
+      } else {
+        assert(Desc.TSFlags == (WebAssemblyII::VariableOpIsImmediate |
+                                WebAssemblyII::VariableOpImmediateIsLabel));
+        encodeULEB128(uint64_t(MO.getImm()), OS);
+      }
     } else if (MO.isFPImm()) {
-      support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
+      assert(i < Desc.getNumOperands() &&
+             "Unexpected floating-point immediate as a non-fixed operand");
+      assert(Desc.TSFlags == 0 &&
+             "WebAssembly variable_ops floating point ops don't use TSFlags");
+      const MCOperandInfo &Info = Desc.OpInfo[i];
+      if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
+        // TODO: MC converts all floating point immediate operands to double.
+        // This is fine for numeric values, but may cause NaNs to change bits.
+        float f = float(MO.getFPImm());
+        support::endian::Writer<support::little>(OS).write<float>(f);
+      } else {
+        assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
+        double d = MO.getFPImm();
+        support::endian::Writer<support::little>(OS).write<double>(d);
+      }
     } else if (MO.isExpr()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(0);
       Fixups.push_back(MCFixup::create(
-          (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
-          MO.getExpr(),
+          OS.tell() - Start, MO.getExpr(),
           STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
           MI.getLoc()));
       ++MCNumFixups;
+      encodeULEB128(STI.getTargetTriple().isArch64Bit() ? UINT64_MAX
+                                                        : uint64_t(UINT32_MAX),
+                    OS);
     } else {
       llvm_unreachable("unexpected operand kind");
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index ac11a64..3dc1ded 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -77,7 +77,8 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
 
 static MCAsmBackend *createAsmBackend(const Target & /*T*/,
                                       const MCRegisterInfo & /*MRI*/,
-                                      const Triple &TT, StringRef /*CPU*/) {
+                                      const Triple &TT, StringRef /*CPU*/,
+                                      const MCTargetOptions & /*Options*/) {
   return createWebAssemblyAsmBackend(TT);
 }
 
@@ -100,7 +101,8 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
 
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyTargetMC() {
-  for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
+  for (Target *T :
+       {&getTheWebAssemblyTarget32(), &getTheWebAssemblyTarget64()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createMCAsmInfo);
 
@@ -132,3 +134,13 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
     TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
   }
 }
+
+WebAssembly::ValType WebAssembly::toValType(const MVT &Ty) {
+  switch (Ty.SimpleTy) {
+  case MVT::i32: return WebAssembly::ValType::I32;
+  case MVT::i64: return WebAssembly::ValType::I64;
+  case MVT::f32: return WebAssembly::ValType::F32;
+  case MVT::f64: return WebAssembly::ValType::F64;
+  default: llvm_unreachable("unexpected type");
+  }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 001bd7f..8583b77 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -26,12 +26,13 @@ class MCContext;
 class MCInstrInfo;
 class MCObjectWriter;
 class MCSubtargetInfo;
+class MVT;
 class Target;
 class Triple;
 class raw_pwrite_stream;
 
-extern Target TheWebAssemblyTarget32;
-extern Target TheWebAssemblyTarget64;
+Target &getTheWebAssemblyTarget32();
+Target &getTheWebAssemblyTarget64();
 
 MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
 
@@ -44,23 +45,25 @@ namespace WebAssembly {
 enum OperandType {
   /// Basic block label in a branch construct.
   OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
+  /// Local index.
+  OPERAND_LOCAL,
+  /// 32-bit integer immediates.
+  OPERAND_I32IMM,
+  /// 64-bit integer immediates.
+  OPERAND_I64IMM,
   /// 32-bit floating-point immediates.
-  OPERAND_FP32IMM,
+  OPERAND_F32IMM,
   /// 64-bit floating-point immediates.
-  OPERAND_FP64IMM,
+  OPERAND_F64IMM,
+  /// 32-bit unsigned function indices.
+  OPERAND_FUNCTION32,
+  /// 32-bit unsigned memory offsets.
+  OPERAND_OFFSET32,
   /// p2align immediate for load and store address alignment.
-  OPERAND_P2ALIGN
+  OPERAND_P2ALIGN,
+  /// signature immediate for block/loop.
+  OPERAND_SIGNATURE
 };
-
-/// WebAssembly-specific directive identifiers.
-enum Directive {
-  // FIXME: This is not the real binary encoding.
-  DotParam = UINT64_MAX - 0,   ///< .param
-  DotResult = UINT64_MAX - 1,  ///< .result
-  DotLocal = UINT64_MAX - 2,   ///< .local
-  DotEndFunc = UINT64_MAX - 3, ///< .endfunc
-};
-
 } // end namespace WebAssembly
 
 namespace WebAssemblyII {
@@ -70,7 +73,7 @@ enum {
   VariableOpIsImmediate = (1 << 0),
   // For immediate values in the variable_ops range, this flag indicates
   // whether the value represents a control-flow label.
-  VariableOpImmediateIsLabel = (1 << 1),
+  VariableOpImmediateIsLabel = (1 << 1)
 };
 } // end namespace WebAssemblyII
 
@@ -123,14 +126,55 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::STORE_I64:
   case WebAssembly::STORE_F64:
     return 3;
-  default: llvm_unreachable("Only loads and stores have p2align values");
+  default:
+    llvm_unreachable("Only loads and stores have p2align values");
   }
 }
 
 /// The operand number of the load or store address in load/store instructions.
-static const unsigned MemOpAddressOperandNo = 2;
-/// The operand number of the stored value in a store instruction.
-static const unsigned StoreValueOperandNo = 4;
+static const unsigned LoadAddressOperandNo = 3;
+static const unsigned StoreAddressOperandNo = 2;
+
+/// The operand number of the load or store p2align in load/store instructions.
+static const unsigned LoadP2AlignOperandNo = 1;
+static const unsigned StoreP2AlignOperandNo = 0;
+
+/// This is used to indicate block signatures.
+enum class ExprType {
+  Void    = 0x40,
+  I32     = 0x7f,
+  I64     = 0x7e,
+  F32     = 0x7d,
+  F64     = 0x7c,
+  I8x16   = 0x7b,
+  I16x8   = 0x7a,
+  I32x4   = 0x79,
+  F32x4   = 0x78,
+  B8x16   = 0x77,
+  B16x8   = 0x76,
+  B32x4   = 0x75
+};
+
+/// This is used to indicate local types.
+enum class ValType {
+  I32     = 0x7f,
+  I64     = 0x7e,
+  F32     = 0x7d,
+  F64     = 0x7c,
+  I8x16   = 0x7b,
+  I16x8   = 0x7a,
+  I32x4   = 0x79,
+  F32x4   = 0x78,
+  B8x16   = 0x77,
+  B16x8   = 0x76,
+  B32x4   = 0x75
+};
+
+/// Instruction opcodes emitted via means other than CodeGen.
+static const unsigned Nop = 0x01;
+static const unsigned End = 0x0b;
+
+ValType toValType(const MVT &Ty);
 
 } // end namespace WebAssembly
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 3d61c15..3cee8b2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -58,45 +58,63 @@ void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
 }
 
 void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
-  OS << "\t.local  \t";
-  PrintTypes(OS, Types);
+  if (!Types.empty()) {
+    OS << "\t.local  \t";
+    PrintTypes(OS, Types);
+  }
 }
 
 void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
 
 void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
-    StringRef name, SmallVectorImpl<MVT> &SignatureVTs, size_t NumResults) {
+    StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
   OS << "\t.functype\t" << name;
-  if (NumResults == 0) OS << ", void";
-  for (auto Ty : SignatureVTs) {
-    OS << ", " << WebAssembly::TypeToString(Ty);
+  if (Results.empty())
+    OS << ", void";
+  else {
+    assert(Results.size() == 1);
+    OS << ", " << WebAssembly::TypeToString(Results.front());
   }
-  OS << "\n";
+  for (auto Ty : Params)
+    OS << ", " << WebAssembly::TypeToString(Ty);
+  OS << '\n';
 }
 
-// FIXME: What follows is not the real binary encoding.
+void WebAssemblyTargetAsmStreamer::emitGlobalImport(StringRef name) {
+  OS << "\t.import_global\t" << name << '\n';
+}
 
-static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(Types.size(), sizeof(uint64_t));
-  for (MVT Type : Types)
-    Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t));
+void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
+  OS << "\t.indidx  \t" << *Value << '\n';
 }
 
 void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  // Nothing to emit; params are declared as part of the function signature.
 }
 
 void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  // Nothing to emit; results are declared as part of the function signature.
 }
 
 void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  Streamer.EmitULEB128IntValue(Types.size());
+  for (MVT Type : Types)
+    Streamer.EmitIntValue(int64_t(WebAssembly::toValType(Type)), 1);
 }
 
 void WebAssemblyTargetELFStreamer::emitEndFunc() {
-  Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t));
+  Streamer.EmitIntValue(WebAssembly::End, 1);
+}
+
+void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) {
+  llvm_unreachable(".indidx encoding not yet implemented");
 }
+
+void WebAssemblyTargetELFStreamer::emitIndirectFunctionType(
+    StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+  // Nothing to emit here. TODO: Re-design how linking works and re-evaluate
+  // whether it's necessary for .o files to declare indirect function types.
+}
+
+void WebAssemblyTargetELFStreamer::emitGlobalImport(StringRef name) {
+}
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 51354ef..23ac319 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -39,10 +39,14 @@ public:
   virtual void emitEndFunc() = 0;
   /// .functype
   virtual void emitIndirectFunctionType(StringRef name,
-                                        SmallVectorImpl<MVT> &SignatureVTs,
-                                        size_t NumResults) {
+                                        SmallVectorImpl<MVT> &Params,
+                                        SmallVectorImpl<MVT> &Results) {
     llvm_unreachable("emitIndirectFunctionType not implemented");
   }
+  /// .indidx
+  virtual void emitIndIdx(const MCExpr *Value) = 0;
+  /// .import_global
+  virtual void emitGlobalImport(StringRef name) = 0;
 };
 
 /// This part is for ascii assembly output
@@ -57,8 +61,10 @@ public:
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitEndFunc() override;
   void emitIndirectFunctionType(StringRef name,
-                                SmallVectorImpl<MVT> &SignatureVTs,
-                                size_t NumResults) override;
+                                SmallVectorImpl<MVT> &Params,
+                                SmallVectorImpl<MVT> &Results) override;
+  void emitIndIdx(const MCExpr *Value) override;
+  void emitGlobalImport(StringRef name) override;
 };
 
 /// This part is for ELF object output
@@ -70,6 +76,11 @@ public:
   void emitResult(ArrayRef<MVT> Types) override;
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitEndFunc() override;
+  void emitIndirectFunctionType(StringRef name,
+                                SmallVectorImpl<MVT> &Params,
+                                SmallVectorImpl<MVT> &Results) override;
+  void emitIndIdx(const MCExpr *Value) override;
+  void emitGlobalImport(StringRef name) override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index a6c2eef..64991ad 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -1,22 +1,30 @@
 //===-- README.txt - Notes for WebAssembly code gen -----------------------===//
 
-This WebAssembly backend is presently in a very early stage of development.
-The code should build and not break anything else, but don't expect a lot more
-at this point.
+This WebAssembly backend is presently under development.
 
-For more information on WebAssembly itself, see the design documents:
-  * https://github.com/WebAssembly/design/blob/master/README.md
+Currently the easiest way to use it is through Emscripten, which provides a
+compilation environment that includes standard libraries, tools, and packaging
+for producing WebAssembly applications that can run in browsers and other
+environments. For more information, see the Emscripten documentation in
+general, and this page in particular:
+  * https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend
 
-The following documents contain some information on the planned semantics and
-binary encoding of WebAssembly itself:
-  * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
+Other ways of using this backend, such as via a standalone "clang", are also
+under development, though they are not generally usable yet.
+
+For more information on WebAssembly itself, see the home page:
+  * https://webassembly.github.io/
+
+The following documents contain some information on the semantics and binary
+encoding of WebAssembly itself:
+  * https://github.com/WebAssembly/design/blob/master/Semantics.md
   * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
 
 The backend is built, tested and archived on the following waterfall:
   https://wasm-stat.us
 
-The backend's bringup is done using the GCC torture test suite first since it
-doesn't require C library support. Current known failures are in
+The backend's bringup is done in part by using the GCC torture test suite, since
+it doesn't require C library support. Current known failures are in
 known_gcc_test_failures.txt, all other tests should pass. The waterfall will
 turn red if not. Once most of these pass, further testing will use LLVM's own
 test suite. The tests can be run locally using:
@@ -24,13 +32,13 @@ test suite. The tests can be run locally using:
 
 //===---------------------------------------------------------------------===//
 
-Br, br_if, and br_table instructions can support having a value on the
-expression stack across the jump (sometimes). We should (a) model this, and
-(b) extend the stackifier to utilize it.
+Br, br_if, and br_table instructions can support having a value on the value
+stack across the jump (sometimes). We should (a) model this, and (b) extend
+the stackifier to utilize it.
 
 //===---------------------------------------------------------------------===//
 
-The min/max operators aren't exactly a<b?a:b because of NaN and negative zero
+The min/max instructions aren't exactly a<b?a:b because of NaN and negative zero
 behavior. The ARM target has the same kind of min/max instructions and has
 implemented optimizations for them; we should do similar optimizations for
 WebAssembly.
@@ -44,7 +52,7 @@ us too?
 
 //===---------------------------------------------------------------------===//
 
-Register stackification uses the EXPR_STACK physical register to impose
+Register stackification uses the VALUE_STACK physical register to impose
 ordering dependencies on instructions with stack operands. This is pessimistic;
 we should consider alternate ways to model stack dependencies.
 
@@ -99,12 +107,6 @@ according to their usage frequency to maximize the usage of smaller encodings.
 
 //===---------------------------------------------------------------------===//
 
-When the last statement in a function body computes the return value, it can
-just let that value be the exit value of the outermost block, rather than
-needing an explicit return operation.
-
-//===---------------------------------------------------------------------===//
-
 Many cases of irreducible control flow could be transformed more optimally
 than via the transform in WebAssemblyFixIrreducibleControlFlow.cpp.
 
@@ -135,3 +137,11 @@ enableMachineScheduler) and determine if it can be configured to schedule
 instructions advantageously for this purpose.
 
 //===---------------------------------------------------------------------===//
+
+WebAssembly is now officially a stack machine, rather than an AST, and this
+comes with additional opportunities for WebAssemblyRegStackify. Specifically,
+the stack doesn't need to be empty after an instruction with no return values.
+WebAssemblyRegStackify could be extended, or possibly rewritten, to take
+advantage of the new opportunities.
+
+//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index ddb1eb1..f310f0a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -19,12 +19,18 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-target-info"
 
-Target llvm::TheWebAssemblyTarget32;
-Target llvm::TheWebAssemblyTarget64;
+Target &llvm::getTheWebAssemblyTarget32() {
+  static Target TheWebAssemblyTarget32;
+  return TheWebAssemblyTarget32;
+}
+Target &llvm::getTheWebAssemblyTarget64() {
+  static Target TheWebAssemblyTarget64;
+  return TheWebAssemblyTarget64;
+}
 
 extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
-  RegisterTarget<Triple::wasm32> X(TheWebAssemblyTarget32, "wasm32",
+  RegisterTarget<Triple::wasm32> X(getTheWebAssemblyTarget32(), "wasm32",
                                    "WebAssembly 32-bit");
-  RegisterTarget<Triple::wasm64> Y(TheWebAssemblyTarget64, "wasm64",
+  RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64",
                                    "WebAssembly 64-bit");
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 957f31c..8738263 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -16,14 +16,19 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
 
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 
 class WebAssemblyTargetMachine;
+class ModulePass;
 class FunctionPass;
 
 // LLVM IR passes.
+ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
+void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
+ModulePass *createWebAssemblyFixFunctionBitcasts();
 FunctionPass *createWebAssemblyOptimizeReturned();
 
 // ISel and immediate followup passes.
@@ -39,11 +44,13 @@ FunctionPass *createWebAssemblyOptimizeLiveIntervals();
 FunctionPass *createWebAssemblyStoreResults();
 FunctionPass *createWebAssemblyRegStackify();
 FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyExplicitLocals();
 FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
 FunctionPass *createWebAssemblyCFGStackify();
 FunctionPass *createWebAssemblyLowerBrUnless();
 FunctionPass *createWebAssemblyRegNumbering();
 FunctionPass *createWebAssemblyPeephole();
+FunctionPass *createWebAssemblyCallIndirectFixup();
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index 551ad93..f647349 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -23,7 +23,7 @@ include "llvm/Target/Target.td"
 // WebAssembly Subtarget features.
 //===----------------------------------------------------------------------===//
 
-def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false",
+def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
                                       "Enable 128-bit SIMD">;
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 5887f45..5fadca3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -26,9 +26,11 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -44,9 +46,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyArgumentMove() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
-    return "WebAssembly Argument Move";
-  }
+  StringRef getPassName() const override { return "WebAssembly Argument Move"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -64,19 +64,6 @@ FunctionPass *llvm::createWebAssemblyArgumentMove() {
   return new WebAssemblyArgumentMove();
 }
 
-/// Test whether the given instruction is an ARGUMENT.
-static bool IsArgument(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
     dbgs() << "********** Argument Move **********\n"
@@ -89,7 +76,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
 
   // Look for the first NonArg instruction.
   for (MachineInstr &MI : EntryMBB) {
-    if (!IsArgument(MI)) {
+    if (!WebAssembly::isArgument(MI)) {
       InsertPt = MI;
       break;
     }
@@ -98,7 +85,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   // Now move any argument instructions later in the block
   // to before our first NonArg instruction.
   for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
-    if (IsArgument(MI)) {
+    if (WebAssembly::isArgument(MI)) {
       EntryMBB.insert(InsertPt, MI.removeFromParent());
       Changed = true;
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 54e9f7f..5b4b82e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -14,10 +14,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "InstPrinter/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMCInstLower.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRegisterInfo.h"
@@ -42,14 +42,14 @@ namespace {
 
 class WebAssemblyAsmPrinter final : public AsmPrinter {
   const MachineRegisterInfo *MRI;
-  const WebAssemblyFunctionInfo *MFI;
+  WebAssemblyFunctionInfo *MFI;
 
 public:
   WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Assembly Printer";
   }
 
@@ -82,7 +82,6 @@ private:
                              raw_ostream &OS) override;
 
   MVT getRegType(unsigned RegNo) const;
-  const char *toString(MVT VT) const;
   std::string regToString(const MachineOperand &MO);
   WebAssemblyTargetStreamer *getTargetStreamer();
 };
@@ -95,7 +94,8 @@ private:
 
 MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
-  for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+  for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
+                MVT::v4i32, MVT::v4f32})
     if (TRC->hasType(T))
       return T;
   DEBUG(errs() << "Unknown type for register number: " << RegNo);
@@ -103,10 +103,6 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   return MVT::Other;
 }
 
-const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
-  return WebAssembly::TypeToString(VT);
-}
-
 std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
   unsigned RegNo = MO.getReg();
   assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
@@ -125,45 +121,21 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
 //===----------------------------------------------------------------------===//
 // WebAssemblyAsmPrinter Implementation.
 //===----------------------------------------------------------------------===//
-static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
-                                 Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
-  const DataLayout &DL(F.getParent()->getDataLayout());
-  const WebAssemblyTargetLowering &TLI =
-      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
-  SmallVector<EVT, 4> VTs;
-  ComputeValueVTs(TLI, DL, Ty, VTs);
-
-  for (EVT VT : VTs) {
-    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
-    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i)
-      ValueVTs.push_back(RegisterVT);
-  }
-}
 
 void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
   for (const auto &F : M) {
     // Emit function type info for all undefined functions
     if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
-      SmallVector<MVT, 4> SignatureVTs;
-      ComputeLegalValueVTs(F, TM, F.getReturnType(), SignatureVTs);
-      size_t NumResults = SignatureVTs.size();
-      if (SignatureVTs.size() > 1) {
-        // WebAssembly currently can't lower returns of multiple values without
-        // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
-        // replace multiple return values with a pointer parameter.
-        SignatureVTs.clear();
-        SignatureVTs.push_back(
-            MVT::getIntegerVT(M.getDataLayout().getPointerSizeInBits()));
-        NumResults = 0;
-      }
-
-      for (auto &Arg : F.args()) {
-        ComputeLegalValueVTs(F, TM, Arg.getType(), SignatureVTs);
-      }
-
-      getTargetStreamer()->emitIndirectFunctionType(F.getName(), SignatureVTs,
-                                                    NumResults);
+      SmallVector<MVT, 4> Results;
+      SmallVector<MVT, 4> Params;
+      ComputeSignatureVTs(F, TM, Params, Results);
+      getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params,
+                                                    Results);
+    }
+  }
+  for (const auto &G : M.globals()) {
+    if (!G.hasInitializer() && G.hasExternalLinkage()) {
+      getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
     }
   }
 }
@@ -183,6 +155,15 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
 
   SmallVector<MVT, 4> ResultVTs;
   const Function &F(*MF->getFunction());
+
+  // Emit the function index.
+  if (MDNode *Idx = F.getMetadata("wasm.index")) {
+    assert(Idx->getNumOperands() == 1);
+
+    getTargetStreamer()->emitIndIdx(AsmPrinter::lowerConstant(
+        cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
+  }
+
   ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
 
   // If the return type needs to be legalized it will get converted into
@@ -190,8 +171,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
   if (ResultVTs.size() == 1)
     getTargetStreamer()->emitResult(ResultVTs);
 
-  bool AnyWARegs = false;
-  SmallVector<MVT, 16> LocalTypes;
+  // FIXME: When ExplicitLocals is enabled by default, we won't need
+  // to define the locals here (and MFI can go back to being pointer-to-const).
   for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
     unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
     unsigned WAReg = MFI->getWAReg(VReg);
@@ -204,11 +185,10 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
     // Don't declare stackified registers.
     if (int(WAReg) < 0)
       continue;
-    LocalTypes.push_back(getRegType(VReg));
-    AnyWARegs = true;
+    MFI->addLocal(getRegType(VReg));
   }
-  if (AnyWARegs)
-    getTargetStreamer()->emitLocal(LocalTypes);
+
+  getTargetStreamer()->emitLocal(MFI->getLocals());
 
   AsmPrinter::EmitFunctionBodyStart();
 }
@@ -225,13 +205,21 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case WebAssembly::ARGUMENT_I64:
   case WebAssembly::ARGUMENT_F32:
   case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4f32:
     // These represent values which are live into the function entry, so there's
     // no instruction to emit.
     break;
   case WebAssembly::FALLTHROUGH_RETURN_I32:
   case WebAssembly::FALLTHROUGH_RETURN_I64:
   case WebAssembly::FALLTHROUGH_RETURN_F32:
-  case WebAssembly::FALLTHROUGH_RETURN_F64: {
+  case WebAssembly::FALLTHROUGH_RETURN_F64:
+  case WebAssembly::FALLTHROUGH_RETURN_v16i8:
+  case WebAssembly::FALLTHROUGH_RETURN_v8i16:
+  case WebAssembly::FALLTHROUGH_RETURN_v4i32:
+  case WebAssembly::FALLTHROUGH_RETURN_v4f32: {
     // These instructions represent the implicit return at the end of a
     // function body. The operand is always a pop.
     assert(MFI->isVRegStackified(MI->getOperand(0).getReg()));
@@ -329,6 +317,6 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
-  RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32);
-  RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64);
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> X(getTheWebAssemblyTarget32());
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(getTheWebAssemblyTarget64());
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 33166f5..49b9754 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -27,6 +27,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -43,9 +44,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyCFGStackify final : public MachineFunctionPass {
-  const char *getPassName() const override {
-    return "WebAssembly CFG Stackify";
-  }
+  StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -294,26 +293,16 @@ static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
   return false;
 }
 
-/// Test whether MI is a child of some other node in an expression tree.
-static bool IsChild(const MachineInstr &MI,
-                    const WebAssemblyFunctionInfo &MFI) {
-  if (MI.getNumOperands() == 0)
-    return false;
-  const MachineOperand &MO = MI.getOperand(0);
-  if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
-    return false;
-  unsigned Reg = MO.getReg();
-  return TargetRegisterInfo::isVirtualRegister(Reg) &&
-         MFI.isVRegStackified(Reg);
-}
-
 /// Insert a BLOCK marker for branches to MBB (if needed).
-static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
-                             SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-                             const WebAssemblyInstrInfo &TII,
-                             const MachineLoopInfo &MLI,
-                             MachineDominatorTree &MDT,
-                             WebAssemblyFunctionInfo &MFI) {
+static void PlaceBlockMarker(
+    MachineBasicBlock &MBB, MachineFunction &MF,
+    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
+    const WebAssemblyInstrInfo &TII,
+    const MachineLoopInfo &MLI,
+    MachineDominatorTree &MDT,
+    WebAssemblyFunctionInfo &MFI) {
   // First compute the nearest common dominator of all forward non-fallthrough
   // predecessors so that we minimize the time that the BLOCK is on the stack,
   // which reduces overall stack height.
@@ -332,7 +321,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     return;
 
   assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
-  MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
+  MachineBasicBlock *LayoutPred = &*std::prev(MachineFunction::iterator(&MBB));
 
   // If the nearest common dominator is inside a more deeply nested context,
   // walk out to the nearest scope which isn't more deeply nested.
@@ -340,7 +329,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
       if (ScopeTop->getNumber() > Header->getNumber()) {
         // Skip over an intervening scope.
-        I = next(MachineFunction::iterator(ScopeTop));
+        I = std::next(MachineFunction::iterator(ScopeTop));
       } else {
         // We found a scope level at an appropriate depth.
         Header = ScopeTop;
@@ -349,13 +338,6 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     }
   }
 
-  // If there's a loop which ends just before MBB which contains Header, we can
-  // reuse its label instead of inserting a new BLOCK.
-  for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
-       Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
-    if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
-      return;
-
   // Decide where in Header to put the BLOCK.
   MachineBasicBlock::iterator InsertPos;
   MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
@@ -363,28 +345,35 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     // Header is the header of a loop that does not lexically contain MBB, so
     // the BLOCK needs to be above the LOOP, after any END constructs.
     InsertPos = Header->begin();
-    while (InsertPos->getOpcode() != WebAssembly::LOOP)
+    while (InsertPos->getOpcode() == WebAssembly::END_BLOCK ||
+           InsertPos->getOpcode() == WebAssembly::END_LOOP)
       ++InsertPos;
   } else {
     // Otherwise, insert the BLOCK as late in Header as we can, but before the
     // beginning of the local expression tree and any nested BLOCKs.
     InsertPos = Header->getFirstTerminator();
-    while (InsertPos != Header->begin() && IsChild(*prev(InsertPos), MFI) &&
-           prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
-           prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
-           prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
+    while (InsertPos != Header->begin() &&
+           WebAssembly::isChild(*std::prev(InsertPos), MFI) &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
       --InsertPos;
   }
 
   // Add the BLOCK.
-  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
+  MachineInstr *Begin = BuildMI(*Header, InsertPos, DebugLoc(),
+                                TII.get(WebAssembly::BLOCK))
+      .addImm(int64_t(WebAssembly::ExprType::Void));
 
   // Mark the end of the block.
   InsertPos = MBB.begin();
   while (InsertPos != MBB.end() &&
-         InsertPos->getOpcode() == WebAssembly::END_LOOP)
+         InsertPos->getOpcode() == WebAssembly::END_LOOP &&
+         LoopTops[&*InsertPos]->getParent()->getNumber() >= Header->getNumber())
     ++InsertPos;
-  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
+  MachineInstr *End = BuildMI(MBB, InsertPos, DebugLoc(),
+                              TII.get(WebAssembly::END_BLOCK));
+  BlockTops[End] = Begin;
 
   // Track the farthest-spanning scope that ends at this point.
   int Number = MBB.getNumber();
@@ -397,7 +386,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
 static void PlaceLoopMarker(
     MachineBasicBlock &MBB, MachineFunction &MF,
     SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-    DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
     const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
   MachineLoop *Loop = MLI.getLoopFor(&MBB);
   if (!Loop || Loop->getHeader() != &MBB)
@@ -406,13 +395,13 @@ static void PlaceLoopMarker(
   // The operand of a LOOP is the first block after the loop. If the loop is the
   // bottom of the function, insert a dummy block at the end.
   MachineBasicBlock *Bottom = LoopBottom(Loop);
-  auto Iter = next(MachineFunction::iterator(Bottom));
+  auto Iter = std::next(MachineFunction::iterator(Bottom));
   if (Iter == MF.end()) {
     MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
     // Give it a fake predecessor so that AsmPrinter prints its label.
     Label->addSuccessor(Label);
     MF.push_back(Label);
-    Iter = next(MachineFunction::iterator(Bottom));
+    Iter = std::next(MachineFunction::iterator(Bottom));
   }
   MachineBasicBlock *AfterLoop = &*Iter;
 
@@ -422,12 +411,14 @@ static void PlaceLoopMarker(
   while (InsertPos != MBB.end() &&
          InsertPos->getOpcode() == WebAssembly::END_LOOP)
     ++InsertPos;
-  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
+  MachineInstr *Begin = BuildMI(MBB, InsertPos, DebugLoc(),
+                                TII.get(WebAssembly::LOOP))
+      .addImm(int64_t(WebAssembly::ExprType::Void));
 
   // Mark the end of the loop.
   MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
                               TII.get(WebAssembly::END_LOOP));
-  LoopTops[End] = &MBB;
+  LoopTops[End] = Begin;
 
   assert((!ScopeTops[AfterLoop->getNumber()] ||
           ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -449,6 +440,54 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
   return Depth;
 }
 
+/// In normal assembly languages, when the end of a function is unreachable,
+/// because the function ends in an infinite loop or a noreturn call or similar,
+/// it isn't necessary to worry about the function return type at the end of
+/// the function, because it's never reached. However, in WebAssembly, blocks
+/// that end at the function end need to have a return type signature that
+/// matches the function signature, even though it's unreachable. This function
+/// checks for such cases and fixes up the signatures.
+static void FixEndsAtEndOfFunction(
+    MachineFunction &MF,
+    const WebAssemblyFunctionInfo &MFI,
+    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops) {
+  assert(MFI.getResults().size() <= 1);
+
+  if (MFI.getResults().empty())
+    return;
+
+  WebAssembly::ExprType retType;
+  switch (MFI.getResults().front().SimpleTy) {
+  case MVT::i32: retType = WebAssembly::ExprType::I32; break;
+  case MVT::i64: retType = WebAssembly::ExprType::I64; break;
+  case MVT::f32: retType = WebAssembly::ExprType::F32; break;
+  case MVT::f64: retType = WebAssembly::ExprType::F64; break;
+  case MVT::v16i8: retType = WebAssembly::ExprType::I8x16; break;
+  case MVT::v8i16: retType = WebAssembly::ExprType::I16x8; break;
+  case MVT::v4i32: retType = WebAssembly::ExprType::I32x4; break;
+  case MVT::v4f32: retType = WebAssembly::ExprType::F32x4; break;
+  default: llvm_unreachable("unexpected return type");
+  }
+
+  for (MachineBasicBlock &MBB : reverse(MF)) {
+    for (MachineInstr &MI : reverse(MBB)) {
+      if (MI.isPosition() || MI.isDebugValue())
+        continue;
+      if (MI.getOpcode() == WebAssembly::END_BLOCK) {
+        BlockTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        continue;
+      }
+      if (MI.getOpcode() == WebAssembly::END_LOOP) {
+        LoopTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        continue;
+      }
+      // Something other than an `end`. We're done.
+      return;
+    }
+  }
+}
+
 /// Insert LOOP and BLOCK markers at appropriate places.
 static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
                          const WebAssemblyInstrInfo &TII,
@@ -461,15 +500,18 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
   // we may insert at the end.
   SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
 
-  // For eacn LOOP_END, the corresponding LOOP.
-  DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
+  // For each LOOP_END, the corresponding LOOP.
+  DenseMap<const MachineInstr *, MachineInstr *> LoopTops;
+
+  // For each END_BLOCK, the corresponding BLOCK.
+  DenseMap<const MachineInstr *, MachineInstr *> BlockTops;
 
   for (auto &MBB : MF) {
     // Place the LOOP for MBB if MBB is the header of a loop.
     PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
 
     // Place the BLOCK for MBB if MBB is branched to from above.
-    PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT, MFI);
+    PlaceBlockMarker(MBB, MF, ScopeTops, BlockTops, LoopTops, TII, MLI, MDT, MFI);
   }
 
   // Now rewrite references to basic blocks to be depth immediates.
@@ -478,21 +520,19 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
     for (auto &MI : reverse(MBB)) {
       switch (MI.getOpcode()) {
       case WebAssembly::BLOCK:
-        assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
+        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= MBB.getNumber() &&
                "Block should be balanced");
         Stack.pop_back();
         break;
       case WebAssembly::LOOP:
         assert(Stack.back() == &MBB && "Loop top should be balanced");
         Stack.pop_back();
-        Stack.pop_back();
         break;
       case WebAssembly::END_BLOCK:
         Stack.push_back(&MBB);
         break;
       case WebAssembly::END_LOOP:
-        Stack.push_back(&MBB);
-        Stack.push_back(LoopTops[&MI]);
+        Stack.push_back(LoopTops[&MI]->getParent());
         break;
       default:
         if (MI.isTerminator()) {
@@ -511,6 +551,10 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
     }
   }
   assert(Stack.empty() && "Control flow should be balanced");
+
+  // Fix up block/loop signatures at the end of the function to conform to
+  // WebAssembly's rules.
+  FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
 }
 
 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -520,7 +564,7 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
 
   const auto &MLI = getAnalysis<MachineLoopInfo>();
   auto &MDT = getAnalysis<MachineDominatorTree>();
-  // Liveness is not tracked for EXPR_STACK physreg.
+  // Liveness is not tracked for VALUE_STACK physreg.
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   MF.getRegInfo().invalidateLiveness();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
new file mode 100644
index 0000000..fc0a01c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -0,0 +1,120 @@
+//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file converts pseudo call_indirect instructions into real
+/// call_indirects.
+///
+/// The order of arguments for a call_indirect is the arguments to the function
+/// call, followed by the function pointer. There's no natural way to express
+/// a machineinstr with varargs followed by one more arg, so we express it as
+/// the function pointer followed by varargs, then rewrite it here.
+///
+/// We need to rewrite the order of the arguments on the machineinstrs
+/// themselves so that register stackification knows the order they'll be
+/// executed in.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-call-indirect-fixup"
+
+namespace {
+class WebAssemblyCallIndirectFixup final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly CallIndirect Fixup";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyCallIndirectFixup() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCallIndirectFixup::ID = 0;
+FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
+  return new WebAssemblyCallIndirectFixup();
+}
+
+static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+    using namespace WebAssembly;
+  case PCALL_INDIRECT_VOID: return CALL_INDIRECT_VOID;
+  case PCALL_INDIRECT_I32: return CALL_INDIRECT_I32;
+  case PCALL_INDIRECT_I64: return CALL_INDIRECT_I64;
+  case PCALL_INDIRECT_F32: return CALL_INDIRECT_F32;
+  case PCALL_INDIRECT_F64: return CALL_INDIRECT_F64;
+  case PCALL_INDIRECT_v16i8: return CALL_INDIRECT_v16i8;
+  case PCALL_INDIRECT_v8i16: return CALL_INDIRECT_v8i16;
+  case PCALL_INDIRECT_v4i32: return CALL_INDIRECT_v4i32;
+  case PCALL_INDIRECT_v4f32: return CALL_INDIRECT_v4f32;
+  default: return INSTRUCTION_LIST_END;
+  }
+}
+
+static bool IsPseudoCallIndirect(const MachineInstr &MI) {
+  return GetNonPseudoCallIndirectOpcode(MI) !=
+         WebAssembly::INSTRUCTION_LIST_END;
+}
+
+bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
+               << MF.getName() << '\n');
+
+  bool Changed = false;
+  const WebAssemblyInstrInfo *TII =
+      MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      if (IsPseudoCallIndirect(MI)) {
+        DEBUG(dbgs() << "Found call_indirect: " << MI << '\n');
+
+        // Rewrite pseudo to non-pseudo
+        const MCInstrDesc &Desc = TII->get(GetNonPseudoCallIndirectOpcode(MI));
+        MI.setDesc(Desc);
+
+        // Rewrite argument order
+        auto Uses = MI.explicit_uses();
+        MachineInstr::mop_iterator it = Uses.begin();
+        const MachineOperand MO = *it;
+
+        // Set up the flags immediate, which currently has no defined flags
+        // so it's always zero.
+        it->ChangeToImmediate(0);
+
+        MI.addOperand(MF, MO);
+
+        DEBUG(dbgs() << "  After transform: " << MI);
+        Changed = true;
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "\nDone fixing up CALL_INDIRECTs\n\n");
+
+  return Changed;
+}
+
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
new file mode 100644
index 0000000..04ede7f
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -0,0 +1,308 @@
+//===-- WebAssemblyExplicitLocals.cpp - Make Locals Explicit --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file converts any remaining registers into WebAssembly locals.
+///
+/// After register stackification and register coloring, convert non-stackified
+/// registers into locals, inserting explicit get_local and set_local
+/// instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-explicit-locals"
+
+namespace {
+class WebAssemblyExplicitLocals final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly Explicit Locals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyExplicitLocals() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyExplicitLocals::ID = 0;
+FunctionPass *llvm::createWebAssemblyExplicitLocals() {
+  return new WebAssemblyExplicitLocals();
+}
+
+/// Return a local id number for the given register, assigning it a new one
+/// if it doesn't yet have one.
+static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local,
+                           unsigned &CurLocal, unsigned Reg) {
+  return Reg2Local.insert(std::make_pair(Reg, CurLocal++)).first->second;
+}
+
+/// Get the appropriate get_local opcode for the given register class.
+static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::GET_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::GET_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::GET_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::GET_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::GET_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the appropriate set_local opcode for the given register class.
+static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::SET_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::SET_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::SET_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::SET_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::SET_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the appropriate tee_local opcode for the given register class.
+static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::TEE_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::TEE_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::TEE_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::TEE_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::TEE_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the type associated with the given register class.
+static MVT typeForRegClass(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return MVT::i32;
+  if (RC == &WebAssembly::I64RegClass)
+    return MVT::i64;
+  if (RC == &WebAssembly::F32RegClass)
+    return MVT::f32;
+  if (RC == &WebAssembly::F64RegClass)
+    return MVT::f64;
+  llvm_unreachable("unrecognized register class");
+}
+
+/// Given a MachineOperand of a stackified vreg, return the instruction at the
+/// start of the expression tree.
+static MachineInstr *FindStartOfTree(MachineOperand &MO,
+                                     MachineRegisterInfo &MRI,
+                                     WebAssemblyFunctionInfo &MFI) {
+  unsigned Reg = MO.getReg();
+  assert(MFI.isVRegStackified(Reg));
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+
+  // Find the first stackified use and proceed from there.
+  for (MachineOperand &DefMO : Def->explicit_uses()) {
+    if (!DefMO.isReg())
+      continue;
+    return FindStartOfTree(DefMO, MRI, MFI);
+  }
+
+  // If there were no stackified uses, we've reached the start.
+  return Def;
+}
+
+bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Make Locals Explicit **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  // Disable this pass if we aren't doing direct wasm object emission.
+  if (MF.getSubtarget<WebAssemblySubtarget>()
+        .getTargetTriple().isOSBinFormatELF())
+    return false;
+
+  bool Changed = false;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  // Map non-stackified virtual registers to their local ids.
+  DenseMap<unsigned, unsigned> Reg2Local;
+
+  // Handle ARGUMENTS first to ensure that they get the designated numbers.
+  for (MachineBasicBlock::iterator I = MF.begin()->begin(),
+                                   E = MF.begin()->end();
+       I != E;) {
+    MachineInstr &MI = *I++;
+    if (!WebAssembly::isArgument(MI))
+      break;
+    unsigned Reg = MI.getOperand(0).getReg();
+    assert(!MFI.isVRegStackified(Reg));
+    Reg2Local[Reg] = MI.getOperand(1).getImm();
+    MI.eraseFromParent();
+    Changed = true;
+  }
+
+  // Start assigning local numbers after the last parameter.
+  unsigned CurLocal = MFI.getParams().size();
+
+  // Visit each instruction in the function.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+      MachineInstr &MI = *I++;
+      assert(!WebAssembly::isArgument(MI));
+
+      if (MI.isDebugValue() || MI.isLabel())
+        continue;
+
+      // Replace tee instructions with tee_local. The difference is that tee
+      // instructins have two defs, while tee_local instructions have one def
+      // and an index of a local to write to.
+      if (WebAssembly::isTee(MI)) {
+        assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
+        assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
+        unsigned OldReg = MI.getOperand(2).getReg();
+        const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+
+        // Stackify the input if it isn't stackified yet.
+        if (!MFI.isVRegStackified(OldReg)) {
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          unsigned NewReg = MRI.createVirtualRegister(RC);
+          unsigned Opc = getGetLocalOpcode(RC);
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
+              .addImm(LocalId);
+          MI.getOperand(2).setReg(NewReg);
+          MFI.stackifyVReg(NewReg);
+        }
+
+        // Replace the TEE with a TEE_LOCAL.
+        unsigned LocalId =
+            getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
+        unsigned Opc = getTeeLocalOpcode(RC);
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc),
+                MI.getOperand(0).getReg())
+            .addImm(LocalId)
+            .addReg(MI.getOperand(2).getReg());
+
+        MI.eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
+      // Insert set_locals for any defs that aren't stackified yet. Currently
+      // we handle at most one def.
+      assert(MI.getDesc().getNumDefs() <= 1);
+      if (MI.getDesc().getNumDefs() == 1) {
+        unsigned OldReg = MI.getOperand(0).getReg();
+        if (!MFI.isVRegStackified(OldReg) && !MRI.use_empty(OldReg)) {
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+          unsigned NewReg = MRI.createVirtualRegister(RC);
+          auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
+          unsigned Opc = getSetLocalOpcode(RC);
+          BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+              .addImm(LocalId)
+              .addReg(NewReg);
+          MI.getOperand(0).setReg(NewReg);
+          MFI.stackifyVReg(NewReg);
+          Changed = true;
+        }
+      }
+
+      // Insert get_locals for any uses that aren't stackified yet.
+      MachineInstr *InsertPt = &MI;
+      for (MachineOperand &MO : reverse(MI.explicit_uses())) {
+        if (!MO.isReg())
+          continue;
+
+        unsigned OldReg = MO.getReg();
+
+        // If we see a stackified register, prepare to insert subsequent
+        // get_locals before the start of its tree.
+        if (MFI.isVRegStackified(OldReg)) {
+          InsertPt = FindStartOfTree(MO, MRI, MFI);
+          continue;
+        }
+
+        // Insert a get_local.
+        unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+        const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+        unsigned NewReg = MRI.createVirtualRegister(RC);
+        unsigned Opc = getGetLocalOpcode(RC);
+        InsertPt =
+            BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
+                .addImm(LocalId);
+        MO.setReg(NewReg);
+        MFI.stackifyVReg(NewReg);
+        Changed = true;
+      }
+
+      // Coalesce and eliminate COPY instructions.
+      if (WebAssembly::isCopy(MI)) {
+        MRI.replaceRegWith(MI.getOperand(1).getReg(),
+                           MI.getOperand(0).getReg());
+        MI.eraseFromParent();
+        Changed = true;
+      }
+    }
+  }
+
+  // Define the locals.
+  for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    auto I = Reg2Local.find(Reg);
+    if (I == Reg2Local.end() || I->second < MFI.getParams().size())
+      continue;
+
+    MFI.addLocal(typeForRegClass(MRI.getRegClass(Reg)));
+    Changed = true;
+  }
+
+#ifndef NDEBUG
+  // Assert that all registers have been stackified at this point.
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.isDebugValue() || MI.isLabel())
+        continue;
+      for (const MachineOperand &MO : MI.explicit_operands()) {
+        assert(
+            (!MO.isReg() || MRI.use_empty(MO.getReg()) ||
+             MFI.isVRegStackified(MO.getReg())) &&
+            "WebAssemblyExplicitLocals failed to stackify a register operand");
+      }
+    }
+  }
+#endif
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 7bfa407..bc7020f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -84,7 +84,10 @@ class WebAssemblyFastISel final : public FastISel {
       return Base.FI;
     }
 
-    void setOffset(int64_t Offset_) { Offset = Offset_; }
+    void setOffset(int64_t Offset_) {
+      assert(Offset_ >= 0 && "Offsets must be non-negative");
+      Offset = Offset_;
+    }
     int64_t getOffset() const { return Offset; }
     void setGlobalValue(const GlobalValue *G) { GV = G; }
     const GlobalValue *getGlobalValue() const { return GV; }
@@ -113,6 +116,13 @@ private:
     case MVT::f32:
     case MVT::f64:
       return VT;
+    case MVT::v16i8:
+    case MVT::v8i16:
+    case MVT::v4i32:
+    case MVT::v4f32:
+      if (Subtarget->hasSIMD128())
+        return VT;
+      break;
     default:
       break;
     }
@@ -229,12 +239,15 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
   case Instruction::GetElementPtr: {
     Address SavedAddr = Addr;
     uint64_t TmpOffset = Addr.getOffset();
+    // Non-inbounds geps can wrap; wasm's offsets can't.
+    if (!cast<GEPOperator>(U)->isInBounds())
+      goto unsupported_gep;
     // Iterate through the GEP folding the constants into offsets where
     // we can.
     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
          GTI != E; ++GTI) {
       const Value *Op = GTI.getOperand();
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
@@ -265,10 +278,13 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
         }
       }
     }
-    // Try to grab the base operand now.
-    Addr.setOffset(TmpOffset);
-    if (computeAddress(U->getOperand(0), Addr))
-      return true;
+    // Don't fold in negative offsets.
+    if (int64_t(TmpOffset) >= 0) {
+      // Try to grab the base operand now.
+      Addr.setOffset(TmpOffset);
+      if (computeAddress(U->getOperand(0), Addr))
+        return true;
+    }
     // We failed, restore everything and try the other options.
     Addr = SavedAddr;
   unsupported_gep:
@@ -294,8 +310,11 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       std::swap(LHS, RHS);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
-      return computeAddress(LHS, Addr);
+      uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
+      if (int64_t(TmpOffset) >= 0) {
+        Addr.setOffset(TmpOffset);
+        return computeAddress(LHS, Addr);
+      }
     }
 
     Address Backup = Addr;
@@ -311,8 +330,11 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
     const Value *RHS = U->getOperand(1);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
-      return computeAddress(LHS, Addr);
+      int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
+      if (TmpOffset >= 0) {
+        Addr.setOffset(TmpOffset);
+        return computeAddress(LHS, Addr);
+      }
     }
     break;
   }
@@ -341,6 +363,10 @@ void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
 void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
                                                const MachineInstrBuilder &MIB,
                                                MachineMemOperand *MMO) {
+  // Set the alignment operand (this is rewritten in SetP2AlignOperands).
+  // TODO: Disable SetP2AlignOperands for FastISel and just do it here.
+  MIB.addImm(0);
+
   if (const GlobalValue *GV = Addr.getGlobalValue())
     MIB.addGlobalAddress(GV, Addr.getOffset());
   else
@@ -351,10 +377,6 @@ void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
   else
     MIB.addFrameIndex(Addr.getFI());
 
-  // Set the alignment operand (this is rewritten in SetP2AlignOperands).
-  // TODO: Disable SetP2AlignOperands for FastISel and just do it here.
-  MIB.addImm(0);
-
   MIB.addMemOperand(MMO);
 }
 
@@ -381,6 +403,9 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
 
 unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
                                               MVT::SimpleValueType From) {
+  if (Reg == 0)
+    return 0;
+
   switch (From) {
   case MVT::i1:
     // If the value is naturally an i1, we don't need to mask it.
@@ -415,6 +440,9 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
 
 unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
                                               MVT::SimpleValueType From) {
+  if (Reg == 0)
+    return 0;
+
   switch (From) {
   case MVT::i1:
   case MVT::i8:
@@ -529,8 +557,8 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
                                          &WebAssembly::I64RegClass :
                                          &WebAssembly::I32RegClass);
     unsigned Opc = Subtarget->hasAddr64() ?
-                   WebAssembly::COPY_LOCAL_I64 :
-                   WebAssembly::COPY_LOCAL_I32;
+                   WebAssembly::COPY_I64 :
+                   WebAssembly::COPY_I32;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
         .addFrameIndex(SI->second);
     return ResultReg;
@@ -575,7 +603,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       return false;
 
     Type *ArgTy = Arg.getType();
-    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
+      return false;
+    if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy())
       return false;
 
     unsigned Opc;
@@ -600,6 +630,22 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       Opc = WebAssembly::ARGUMENT_F64;
       RC = &WebAssembly::F64RegClass;
       break;
+    case MVT::v16i8:
+      Opc = WebAssembly::ARGUMENT_v16i8;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v8i16:
+      Opc = WebAssembly::ARGUMENT_v8i16;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v4i32:
+      Opc = WebAssembly::ARGUMENT_v4i32;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v4f32:
+      Opc = WebAssembly::ARGUMENT_v4f32;
+      RC = &WebAssembly::V128RegClass;
+      break;
     default:
       return false;
     }
@@ -617,6 +663,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
   for (auto const &Arg : F->args())
     MFI->addParam(getLegalType(getSimpleType(Arg.getType())));
 
+  if (!F->getReturnType()->isVoidTy())
+    MFI->addResult(getLegalType(getSimpleType(F->getReturnType())));
+
   return true;
 }
 
@@ -637,29 +686,52 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
   bool IsVoid = FuncTy->getReturnType()->isVoidTy();
   unsigned ResultReg;
   if (IsVoid) {
-    Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::CALL_INDIRECT_VOID;
+    Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::PCALL_INDIRECT_VOID;
   } else {
+    if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy())
+      return false;
+
     MVT::SimpleValueType RetTy = getSimpleType(Call->getType());
     switch (RetTy) {
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::CALL_INDIRECT_I32;
+      Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::PCALL_INDIRECT_I32;
       ResultReg = createResultReg(&WebAssembly::I32RegClass);
       break;
     case MVT::i64:
-      Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::CALL_INDIRECT_I64;
+      Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::PCALL_INDIRECT_I64;
       ResultReg = createResultReg(&WebAssembly::I64RegClass);
       break;
     case MVT::f32:
-      Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::CALL_INDIRECT_F32;
+      Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::PCALL_INDIRECT_F32;
       ResultReg = createResultReg(&WebAssembly::F32RegClass);
       break;
     case MVT::f64:
-      Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::CALL_INDIRECT_F64;
+      Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::PCALL_INDIRECT_F64;
       ResultReg = createResultReg(&WebAssembly::F64RegClass);
       break;
+    case MVT::v16i8:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::PCALL_INDIRECT_v16i8;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v8i16:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::PCALL_INDIRECT_v8i16;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v4i32:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::PCALL_INDIRECT_v4i32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v4f32:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::PCALL_INDIRECT_v4f32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
     default:
       return false;
     }
@@ -972,6 +1044,8 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
   const LoadInst *Load = cast<LoadInst>(I);
   if (Load->isAtomic())
     return false;
+  if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
+    return false;
 
   Address Addr;
   if (!computeAddress(Load->getPointerOperand(), Addr))
@@ -1027,40 +1101,36 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
   const StoreInst *Store = cast<StoreInst>(I);
   if (Store->isAtomic())
     return false;
+  if (!Subtarget->hasSIMD128() &&
+      Store->getValueOperand()->getType()->isVectorTy())
+    return false;
 
   Address Addr;
   if (!computeAddress(Store->getPointerOperand(), Addr))
     return false;
 
   unsigned Opc;
-  const TargetRegisterClass *RC;
   bool VTIsi1 = false;
   switch (getSimpleType(Store->getValueOperand()->getType())) {
   case MVT::i1:
     VTIsi1 = true;
   case MVT::i8:
     Opc = WebAssembly::STORE8_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i16:
     Opc = WebAssembly::STORE16_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i32:
     Opc = WebAssembly::STORE_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i64:
     Opc = WebAssembly::STORE_I64;
-    RC = &WebAssembly::I64RegClass;
     break;
   case MVT::f32:
     Opc = WebAssembly::STORE_F32;
-    RC = &WebAssembly::F32RegClass;
     break;
   case MVT::f64:
     Opc = WebAssembly::STORE_F64;
-    RC = &WebAssembly::F64RegClass;
     break;
   default: return false;
   }
@@ -1068,12 +1138,12 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
   materializeLoadStoreOperands(Addr);
 
   unsigned ValueReg = getRegForValue(Store->getValueOperand());
+  if (ValueReg == 0)
+    return false;
   if (VTIsi1)
     ValueReg = maskI1Value(ValueReg, Store->getValueOperand());
 
-  unsigned ResultReg = createResultReg(RC);
-  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
-                     ResultReg);
+  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
 
   addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store));
 
@@ -1094,6 +1164,8 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
 
   bool Not;
   unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
+  if (CondReg == 0)
+    return false;
 
   unsigned Opc = WebAssembly::BR_IF;
   if (Not)
@@ -1102,7 +1174,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
       .addMBB(TBB)
       .addReg(CondReg);
-  
+
   finishCondBranch(Br->getParent(), TBB, FBB);
   return true;
 }
@@ -1120,6 +1192,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   }
 
   Value *RV = Ret->getOperand(0);
+  if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy())
+    return false;
+
   unsigned Opc;
   switch (getSimpleType(RV->getType())) {
   case MVT::i1: case MVT::i8:
@@ -1129,8 +1204,24 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   case MVT::i64:
     Opc = WebAssembly::RETURN_I64;
     break;
-  case MVT::f32: Opc = WebAssembly::RETURN_F32; break;
-  case MVT::f64: Opc = WebAssembly::RETURN_F64; break;
+  case MVT::f32:
+    Opc = WebAssembly::RETURN_F32;
+    break;
+  case MVT::f64:
+    Opc = WebAssembly::RETURN_F64;
+    break;
+  case MVT::v16i8:
+    Opc = WebAssembly::RETURN_v16i8;
+    break;
+  case MVT::v8i16:
+    Opc = WebAssembly::RETURN_v8i16;
+    break;
+  case MVT::v4i32:
+    Opc = WebAssembly::RETURN_v4i32;
+    break;
+  case MVT::v4f32:
+    Opc = WebAssembly::RETURN_v4f32;
+    break;
   default: return false;
   }
 
@@ -1142,6 +1233,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   else
     Reg = getRegForValue(RV);
 
+  if (Reg == 0)
+    return false;
+
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg);
   return true;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
new file mode 100644
index 0000000..adf904e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -0,0 +1,166 @@
+//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Fix bitcasted functions.
+///
+/// WebAssembly requires caller and callee signatures to match, however in LLVM,
+/// some amount of slop is vaguely permitted. Detect mismatch by looking for
+/// bitcasts of functions and rewrite them to use wrapper functions instead.
+///
+/// This doesn't catch all cases, such as when a function's address is taken in
+/// one place and casted in another, but it works for many common cases.
+///
+/// Note that LLVM already optimizes away function bitcasts in common cases by
+/// dropping arguments as needed, so this pass only ends up getting used in less
+/// common cases.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fix-function-bitcasts"
+
+namespace {
+class FixFunctionBitcasts final : public ModulePass {
+  StringRef getPassName() const override {
+    return "WebAssembly Fix Function Bitcasts";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    ModulePass::getAnalysisUsage(AU);
+  }
+
+  bool runOnModule(Module &M) override;
+
+public:
+  static char ID;
+  FixFunctionBitcasts() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char FixFunctionBitcasts::ID = 0;
+ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
+  return new FixFunctionBitcasts();
+}
+
+// Recursively descend the def-use lists from V to find non-bitcast users of
+// bitcasts of V.
+static void FindUses(Value *V, Function &F,
+                     SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
+                     SmallPtrSetImpl<Constant *> &ConstantBCs) {
+  for (Use &U : V->uses()) {
+    if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
+      FindUses(BC, F, Uses, ConstantBCs);
+    else if (U.get()->getType() != F.getType()) {
+      if (isa<Constant>(U.get())) {
+        // Only add constant bitcasts to the list once; they get RAUW'd
+        auto c = ConstantBCs.insert(cast<Constant>(U.get()));
+        if (!c.second) continue;
+      }
+      Uses.push_back(std::make_pair(&U, &F));
+    }
+  }
+}
+
+// Create a wrapper function with type Ty that calls F (which may have a
+// different type). Attempt to support common bitcasted function idioms:
+//  - Call with more arguments than needed: arguments are dropped
+//  - Call with fewer arguments than needed: arguments are filled in with undef
+//  - Return value is not needed: drop it
+//  - Return value needed but not present: supply an undef
+//  
+// For now, return nullptr without creating a wrapper if the wrapper cannot
+// be generated due to incompatible types.
+static Function *CreateWrapper(Function *F, FunctionType *Ty) {
+  Module *M = F->getParent();
+
+  Function *Wrapper =
+      Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+
+  // Determine what arguments to pass.
+  SmallVector<Value *, 4> Args;
+  Function::arg_iterator AI = Wrapper->arg_begin();
+  FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
+  FunctionType::param_iterator PE = F->getFunctionType()->param_end();
+  for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) {
+    if (AI->getType() != *PI) {
+      Wrapper->eraseFromParent();
+      return nullptr;
+    }
+    Args.push_back(&*AI);
+  }
+  for (; PI != PE; ++PI)
+    Args.push_back(UndefValue::get(*PI));
+
+  CallInst *Call = CallInst::Create(F, Args, "", BB);
+
+  // Determine what value to return.
+  if (Ty->getReturnType()->isVoidTy())
+    ReturnInst::Create(M->getContext(), BB);
+  else if (F->getFunctionType()->getReturnType()->isVoidTy())
+    ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
+                       BB);
+  else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
+    ReturnInst::Create(M->getContext(), Call, BB);
+  else {
+    Wrapper->eraseFromParent();
+    return nullptr;
+  }
+
+  return Wrapper;
+}
+
+bool FixFunctionBitcasts::runOnModule(Module &M) {
+  SmallVector<std::pair<Use *, Function *>, 0> Uses;
+  SmallPtrSet<Constant *, 2> ConstantBCs;
+
+  // Collect all the places that need wrappers.
+  for (Function &F : M) FindUses(&F, F, Uses, ConstantBCs);
+
+  DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
+
+  for (auto &UseFunc : Uses) {
+    Use *U = UseFunc.first;
+    Function *F = UseFunc.second;
+    PointerType *PTy = cast<PointerType>(U->get()->getType());
+    FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
+
+    // If the function is casted to something like i8* as a "generic pointer"
+    // to be later casted to something else, we can't generate a wrapper for it.
+    // Just ignore such casts for now.
+    if (!Ty)
+      continue;
+
+    auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
+    if (Pair.second)
+      Pair.first->second = CreateWrapper(F, Ty);
+
+    Function *Wrapper = Pair.first->second;
+    if (!Wrapper)
+      continue;
+
+    if (isa<Constant>(U->get()))
+      U->get()->replaceAllUsesWith(Wrapper);
+    else
+      U->set(Wrapper);
+  }
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 5dc9092..2bbf7a2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -47,7 +47,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Fix Irreducible Control Flow";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 0a5782e..a6a2c0b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -37,15 +37,34 @@ using namespace llvm;
 // TODO: wasm64
 // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
 
+/// We need a base pointer in the case of having items on the stack that
+/// require stricter alignment than the stack pointer itself.  Because we need
+/// to shift the stack pointer by some unknown amount to force the alignment,
+/// we need to record the value of the stack pointer on entry to the function.
+bool WebAssemblyFrameLowering::hasBP(
+    const MachineFunction &MF) const {
+  const auto *RegInfo =
+      MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+  return RegInfo->needsStackRealignment(MF);
+}
+
 /// Return true if the specified function should have a dedicated frame pointer
 /// register.
 bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const auto *RegInfo =
-      MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
-  return MFI->isFrameAddressTaken() || MFI->hasVarSizedObjects() ||
-         MFI->hasStackMap() || MFI->hasPatchPoint() ||
-         RegInfo->needsStackRealignment(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // When we have var-sized objects, we move the stack pointer by an unknown
+  // amount, and need to emit a frame pointer to restore the stack to where we
+  // were on function entry.
+  // If we already need a base pointer, we use that to fix up the stack pointer.
+  // If there are no fixed-size objects, we would have no use of a frame
+  // pointer, and thus should not emit one.
+  bool HasFixedSizedObjects = MFI.getStackSize() > 0;
+  bool NeedsFixedReference = !hasBP(MF) || HasFixedSizedObjects;
+
+  return MFI.isFrameAddressTaken() ||
+         (MFI.hasVarSizedObjects() && NeedsFixedReference) ||
+         MFI.hasStackMap() || MFI.hasPatchPoint();
 }
 
 /// Under normal circumstances, when a frame pointer is not required, we reserve
@@ -55,7 +74,7 @@ bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
 /// frame.
 bool WebAssemblyFrameLowering::hasReservedCallFrame(
     const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
 
@@ -88,18 +107,17 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
   const TargetRegisterClass *PtrRC =
       MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
   unsigned Zero = MRI.createVirtualRegister(PtrRC);
-  unsigned Drop = MRI.createVirtualRegister(PtrRC);
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
   BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
       .addImm(0);
-  auto *MMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager()
-                                        .getExternalSymbolCallEntry(ES)),
-                                    MachineMemOperand::MOStore, 4, 4);
-  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), Drop)
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+      MachineMemOperand::MOStore, 4, 4);
+  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
+      .addImm(2)  // p2align
       .addExternalSymbol(SPSymbol)
       .addReg(Zero)
-      .addImm(2)  // p2align
       .addReg(SrcReg)
       .addMemOperand(MMO);
 }
@@ -108,11 +126,11 @@ MachineBasicBlock::iterator
 WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator I) const {
-  assert(!I->getOperand(0).getImm() && hasFP(MF) &&
+  assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&
          "Call frame pseudos should only be used for dynamic stack adjustment");
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
-      needsSPWriteback(MF, *MF.getFrameInfo())) {
+      needsSPWriteback(MF, MF.getFrameInfo())) {
     DebugLoc DL = I->getDebugLoc();
     writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
   }
@@ -122,12 +140,12 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
 void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
   // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions
-  auto *MFI = MF.getFrameInfo();
-  assert(MFI->getCalleeSavedInfo().empty() &&
+  auto &MFI = MF.getFrameInfo();
+  assert(MFI.getCalleeSavedInfo().empty() &&
          "WebAssembly should not have callee-saved registers");
 
-  if (!needsSP(MF, *MFI)) return;
-  uint64_t StackSize = MFI->getStackSize();
+  if (!needsSP(MF, MFI)) return;
+  uint64_t StackSize = MFI.getStackSize();
 
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
@@ -138,22 +156,31 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   const TargetRegisterClass *PtrRC =
       MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
   unsigned Zero = MRI.createVirtualRegister(PtrRC);
-  unsigned SPReg = MRI.createVirtualRegister(PtrRC);
+  unsigned SPReg = WebAssembly::SP32;
+  if (StackSize)
+    SPReg = MRI.createVirtualRegister(PtrRC);
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
       .addImm(0);
-  auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager()
-                                            .getExternalSymbolCallEntry(ES)),
-                                        MachineMemOperand::MOLoad, 4, 4);
+  MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+      MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+      MachineMemOperand::MOLoad, 4, 4);
   // Load the SP value.
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32),
-          StackSize ? SPReg : (unsigned)WebAssembly::SP32)
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+      .addImm(2)       // p2align
       .addExternalSymbol(SPSymbol)
       .addReg(Zero)    // addr
-      .addImm(2)       // p2align
       .addMemOperand(LoadMMO);
 
+  bool HasBP = hasBP(MF);
+  if (HasBP) {
+    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
+    unsigned BasePtr = MRI.createVirtualRegister(PtrRC);
+    FI->setBasePointerVreg(BasePtr);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr)
+        .addReg(SPReg);
+  }
   if (StackSize) {
     // Subtract the frame size
     unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
@@ -164,6 +191,18 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
         .addReg(SPReg)
         .addReg(OffsetReg);
   }
+  if (HasBP) {
+    unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
+    unsigned Alignment = MFI.getMaxAlignment();
+    assert((1u << countTrailingZeros(Alignment)) == Alignment &&
+      "Alignment must be a power of 2");
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
+        .addImm((int)~(Alignment - 1));
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
+            WebAssembly::SP32)
+        .addReg(WebAssembly::SP32)
+        .addReg(BitmaskReg);
+  }
   if (hasFP(MF)) {
     // Unlike most conventional targets (where FP points to the saved FP),
     // FP points to the bottom of the fixed-size locals, so we can use positive
@@ -172,16 +211,16 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
             WebAssembly::FP32)
         .addReg(WebAssembly::SP32);
   }
-  if (StackSize && needsSPWriteback(MF, *MFI)) {
+  if (StackSize && needsSPWriteback(MF, MFI)) {
     writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
   }
 }
 
 void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
-  auto *MFI = MF.getFrameInfo();
-  uint64_t StackSize = MFI->getStackSize();
-  if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return;
+  auto &MFI = MF.getFrameInfo();
+  uint64_t StackSize = MFI.getStackSize();
+  if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
   auto InsertPt = MBB.getFirstTerminator();
@@ -194,7 +233,10 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
   // subtracted in the prolog.
   unsigned SPReg = 0;
   MachineBasicBlock::iterator InsertAddr = InsertPt;
-  if (StackSize) {
+  if (hasBP(MF)) {
+    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
+    SPReg = FI->getBasePointerVreg();
+  } else if (StackSize) {
     const TargetRegisterClass *PtrRC =
         MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
     unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index e20fc5d..bf326fc 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -46,6 +46,7 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
  private:
+  bool hasBP(const MachineFunction &MF) const;
   bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const;
   bool needsSPWriteback(const MachineFunction &MF,
                         const MachineFrameInfo &MFI) const;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 88c38b3..a67137f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -42,7 +42,7 @@ public:
       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Instruction Selection";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 9e77319..6a7f75a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -54,6 +54,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
+  if (Subtarget->hasSIMD128()) {
+    addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
+  }
   // Compute derived properties from the register classes.
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
@@ -190,6 +196,10 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
     switch (Constraint[0]) {
       case 'r':
         assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+        if (Subtarget->hasSIMD128() && VT.isVector()) {
+          if (VT.getSizeInBits() == 128)
+            return std::make_pair(0U, &WebAssembly::V128RegClass);
+        }
         if (VT.isInteger() && !VT.isVector()) {
           if (VT.getSizeInBits() <= 32)
             return std::make_pair(0U, &WebAssembly::I32RegClass);
@@ -319,10 +329,10 @@ SDValue WebAssemblyTargetLowering::LowerCall(
     if (Out.Flags.isInConsecutiveRegsLast())
       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
-      auto *MFI = MF.getFrameInfo();
-      int FI = MFI->CreateStackObject(Out.Flags.getByValSize(),
-                                      Out.Flags.getByValAlign(),
-                                      /*isSS=*/false);
+      auto &MFI = MF.getFrameInfo();
+      int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
+                                     Out.Flags.getByValAlign(),
+                                     /*isSS=*/false);
       SDValue SizeNode =
           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
@@ -365,9 +375,9 @@ SDValue WebAssemblyTargetLowering::LowerCall(
   if (IsVarArg && NumBytes) {
     // For non-fixed arguments, next emit stores to store the argument values
     // to the stack buffer at the offsets computed above.
-    int FI = MF.getFrameInfo()->CreateStackObject(NumBytes,
-                                                  Layout.getStackAlignment(),
-                                                  /*isSS=*/false);
+    int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
+                                                 Layout.getStackAlignment(),
+                                                 /*isSS=*/false);
     unsigned ValNo = 0;
     SmallVector<SDValue, 8> Chains;
     for (SDValue Arg :
@@ -471,12 +481,12 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
-
   if (!CallingConvSupported(CallConv))
     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+
   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
   // of the incoming values before they're represented by virtual registers.
   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
@@ -516,6 +526,13 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     MFI->addParam(PtrVT);
   }
 
+  // Record the number and types of results.
+  SmallVector<MVT, 4> Params;
+  SmallVector<MVT, 4> Results;
+  ComputeSignatureVTs(*MF.getFunction(), DAG.getTarget(), Params, Results);
+  for (MVT VT : Results)
+    MFI->addResult(VT);
+
   return Chain;
 }
 
@@ -570,8 +587,8 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
     EVT VT = Src.getValueType();
     SDValue Copy(
-        DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_LOCAL_I32
-                                          : WebAssembly::COPY_LOCAL_I64,
+        DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
+                                          : WebAssembly::COPY_I64,
                            DL, VT, Src),
         0);
     return Op.getNode()->getNumValues() == 1
@@ -597,7 +614,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
   if (Op.getConstantOperandVal(0) > 0)
     return SDValue();
 
-  DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true);
+  DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
   unsigned FP =
       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index cfa1519..047f4be 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -26,25 +26,65 @@ def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2),
 } // isCodeGenOnly = 1
 
 multiclass CALL<WebAssemblyRegClass vt, string prefix> {
-  def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops),
+  def CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops),
                    [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
-                   !strconcat(prefix, "call\t$dst, $callee")>;
-  def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
-                            [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
-                            !strconcat(prefix, "call_indirect\t$dst, $callee")>;
+                   !strconcat(prefix, "call\t$dst, $callee"),
+                   0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+                              [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+                              "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins i32imm:$flags, variable_ops),
+                            [],
+                            !strconcat(prefix, "call_indirect\t$dst"),
+                            0x11>;
+}
+
+multiclass SIMD_CALL<ValueType vt, string prefix> {
+  def CALL_#vt : SIMD_I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
+                         [(set (vt V128:$dst),
+                               (WebAssemblycall1 (i32 imm:$callee)))],
+                         !strconcat(prefix, "call\t$dst, $callee"),
+                         0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
+                                    (ins I32:$callee, variable_ops),
+                                    [(set (vt V128:$dst),
+                                          (WebAssemblycall1 I32:$callee))],
+                                    "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
+                                  (ins i32imm:$flags, variable_ops),
+                                  [],
+                                  !strconcat(prefix, "call_indirect\t$dst"),
+                                  0x11>;
 }
+
 let Uses = [SP32, SP64], isCall = 1 in {
   defm : CALL<I32, "i32.">;
   defm : CALL<I64, "i64.">;
   defm : CALL<F32, "f32.">;
   defm : CALL<F64, "f64.">;
+  defm : SIMD_CALL<v16i8, "i8x16.">;
+  defm : SIMD_CALL<v8i16, "i16x8.">;
+  defm : SIMD_CALL<v4i32, "i32x4.">;
+  defm : SIMD_CALL<v4f32, "f32x4.">;
 
-  def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops),
+  def CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
                     [(WebAssemblycall0 (i32 imm:$callee))],
-                    "call    \t$callee">;
-  def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
-                             [(WebAssemblycall0 I32:$callee)],
-                             "call_indirect\t$callee">;
+                    "call    \t$callee", 0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+                      [(WebAssemblycall0 I32:$callee)],
+                      "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_VOID : I<(outs), (ins i32imm:$flags, variable_ops),
+                             [],
+                             "call_indirect\t", 0x11>;
 } // Uses = [SP32,SP64], isCall = 1
 
 } // Defs = [ARGUMENTS]
@@ -58,6 +98,14 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_F32 tglobaladdr:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_F64 tglobaladdr:$callee)>;
+def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
           (CALL_VOID tglobaladdr:$callee)>;
 
@@ -70,5 +118,13 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_F32 texternalsym:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_F64 texternalsym:$callee)>;
+def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
           (CALL_VOID texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 444e275..1146431 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -18,14 +18,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 // The condition operand is a boolean value which WebAssembly represents as i32.
 def BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
               [(brcond I32:$cond, bb:$dst)],
-               "br_if   \t$dst, $cond">;
+               "br_if   \t$dst, $cond", 0x0d>;
 let isCodeGenOnly = 1 in
-def BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), [],
-                   "br_unless\t$dst, $cond">;
+def BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), []>;
 let isBarrier = 1 in {
 def BR   : I<(outs), (ins bb_op:$dst),
              [(br bb:$dst)],
-             "br      \t$dst">;
+             "br      \t$dst", 0x0c>;
 } // isBarrier = 1
 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
 
@@ -46,7 +45,7 @@ let Defs = [ARGUMENTS] in {
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
 def BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
                      [(WebAssemblybr_table I32:$index)],
-                     "br_table \t$index"> {
+                     "br_table \t$index", 0x0e> {
   let TSFlags{0} = 1;
   let TSFlags{1} = 1;
 }
@@ -59,37 +58,57 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 // Placemarkers to indicate the start or end of a block or loop scope. These
-// use/clobber EXPR_STACK to prevent them from being moved into the middle of
+// use/clobber VALUE_STACK to prevent them from being moved into the middle of
 // an expression tree.
-let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
-def BLOCK     : I<(outs), (ins), [], "block">;
-def LOOP      : I<(outs), (ins), [], "loop">;
-def END_BLOCK : I<(outs), (ins), [], "end_block">;
-def END_LOOP  : I<(outs), (ins), [], "end_loop">;
-} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
+let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
+def BLOCK     : I<(outs), (ins Signature:$sig), [], "block   \t$sig", 0x02>;
+def LOOP      : I<(outs), (ins Signature:$sig), [], "loop    \t$sig", 0x03>;
+
+// END_BLOCK and END_LOOP are represented with the same opcode in wasm.
+def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
+def END_LOOP  : I<(outs), (ins), [], "end_loop", 0x0b>;
+} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
 
 multiclass RETURN<WebAssemblyRegClass vt> {
   def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
-                     "return  \t$val">;
+                     "return  \t$val", 0x0f>;
   // Equivalent to RETURN_#vt, for use at the end of a function when wasm
   // semantics return by falling off the end of the block.
   let isCodeGenOnly = 1 in
   def FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), []>;
 }
 
+multiclass SIMD_RETURN<ValueType vt> {
+  def RETURN_#vt : SIMD_I<(outs), (ins V128:$val),
+                          [(WebAssemblyreturn (vt V128:$val))],
+                          "return  \t$val", 0x0f>;
+  // Equivalent to RETURN_#vt, for use at the end of a function when wasm
+  // semantics return by falling off the end of the block.
+  let isCodeGenOnly = 1 in
+  def FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), []>;
+}
+
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+
 let isReturn = 1 in {
   defm : RETURN<I32>;
   defm : RETURN<I64>;
   defm : RETURN<F32>;
   defm : RETURN<F64>;
-  def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">;
+  defm : SIMD_RETURN<v16i8>;
+  defm : SIMD_RETURN<v8i16>;
+  defm : SIMD_RETURN<v4i32>;
+  defm : SIMD_RETURN<v4f32>;
+
+  def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
 
   // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt.
   let isCodeGenOnly = 1 in
   def FALLTHROUGH_RETURN_VOID : I<(outs), (ins), []>;
 } // isReturn = 1
-  def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">;
+
+def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>;
+
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 931f4a9..29483ba 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -17,14 +17,14 @@ let Defs = [ARGUMENTS] in {
 
 def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src),
                       [(set I32:$dst, (trunc I64:$src))],
-                      "i32.wrap/i64\t$dst, $src">;
+                      "i32.wrap/i64\t$dst, $src", 0xa7>;
 
 def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src),
                           [(set I64:$dst, (sext I32:$src))],
-                          "i64.extend_s/i32\t$dst, $src">;
+                          "i64.extend_s/i32\t$dst, $src", 0xac>;
 def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
                          [(set I64:$dst, (zext I32:$src))],
-                         "i64.extend_u/i32\t$dst, $src">;
+                         "i64.extend_u/i32\t$dst, $src", 0xad>;
 
 } // defs = [ARGUMENTS]
 
@@ -39,73 +39,73 @@ let Defs = [ARGUMENTS] in {
 let hasSideEffects = 1 in {
 def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
                         [(set I32:$dst, (fp_to_sint F32:$src))],
-                        "i32.trunc_s/f32\t$dst, $src">;
+                        "i32.trunc_s/f32\t$dst, $src", 0xa8>;
 def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
                         [(set I32:$dst, (fp_to_uint F32:$src))],
-                        "i32.trunc_u/f32\t$dst, $src">;
+                        "i32.trunc_u/f32\t$dst, $src", 0xa9>;
 def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
                         [(set I64:$dst, (fp_to_sint F32:$src))],
-                        "i64.trunc_s/f32\t$dst, $src">;
+                        "i64.trunc_s/f32\t$dst, $src", 0xae>;
 def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
                         [(set I64:$dst, (fp_to_uint F32:$src))],
-                        "i64.trunc_u/f32\t$dst, $src">;
+                        "i64.trunc_u/f32\t$dst, $src", 0xaf>;
 def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
                         [(set I32:$dst, (fp_to_sint F64:$src))],
-                        "i32.trunc_s/f64\t$dst, $src">;
+                        "i32.trunc_s/f64\t$dst, $src", 0xaa>;
 def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
                         [(set I32:$dst, (fp_to_uint F64:$src))],
-                        "i32.trunc_u/f64\t$dst, $src">;
+                        "i32.trunc_u/f64\t$dst, $src", 0xab>;
 def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
                         [(set I64:$dst, (fp_to_sint F64:$src))],
-                        "i64.trunc_s/f64\t$dst, $src">;
+                        "i64.trunc_s/f64\t$dst, $src", 0xb0>;
 def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
                         [(set I64:$dst, (fp_to_uint F64:$src))],
-                        "i64.trunc_u/f64\t$dst, $src">;
+                        "i64.trunc_u/f64\t$dst, $src", 0xb1>;
 } // hasSideEffects = 1
 
 def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
                           [(set F32:$dst, (sint_to_fp I32:$src))],
-                          "f32.convert_s/i32\t$dst, $src">;
+                          "f32.convert_s/i32\t$dst, $src", 0xb2>;
 def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src),
                           [(set F32:$dst, (uint_to_fp I32:$src))],
-                          "f32.convert_u/i32\t$dst, $src">;
+                          "f32.convert_u/i32\t$dst, $src", 0xb3>;
 def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src),
                           [(set F64:$dst, (sint_to_fp I32:$src))],
-                          "f64.convert_s/i32\t$dst, $src">;
+                          "f64.convert_s/i32\t$dst, $src", 0xb7>;
 def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src),
                           [(set F64:$dst, (uint_to_fp I32:$src))],
-                          "f64.convert_u/i32\t$dst, $src">;
+                          "f64.convert_u/i32\t$dst, $src", 0xb8>;
 def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src),
                           [(set F32:$dst, (sint_to_fp I64:$src))],
-                          "f32.convert_s/i64\t$dst, $src">;
+                          "f32.convert_s/i64\t$dst, $src", 0xb4>;
 def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src),
                           [(set F32:$dst, (uint_to_fp I64:$src))],
-                          "f32.convert_u/i64\t$dst, $src">;
+                          "f32.convert_u/i64\t$dst, $src", 0xb5>;
 def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src),
                           [(set F64:$dst, (sint_to_fp I64:$src))],
-                          "f64.convert_s/i64\t$dst, $src">;
+                          "f64.convert_s/i64\t$dst, $src", 0xb9>;
 def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src),
                           [(set F64:$dst, (uint_to_fp I64:$src))],
-                          "f64.convert_u/i64\t$dst, $src">;
+                          "f64.convert_u/i64\t$dst, $src", 0xba>;
 
 def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src),
-                        [(set F64:$dst, (fextend F32:$src))],
-                        "f64.promote/f32\t$dst, $src">;
+                        [(set F64:$dst, (fpextend F32:$src))],
+                        "f64.promote/f32\t$dst, $src", 0xbb>;
 def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src),
-                       [(set F32:$dst, (fround F64:$src))],
-                       "f32.demote/f64\t$dst, $src">;
+                       [(set F32:$dst, (fpround F64:$src))],
+                       "f32.demote/f64\t$dst, $src", 0xb6>;
 
 def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src),
                             [(set I32:$dst, (bitconvert F32:$src))],
-                            "i32.reinterpret/f32\t$dst, $src">;
+                            "i32.reinterpret/f32\t$dst, $src", 0xbc>;
 def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src),
                             [(set F32:$dst, (bitconvert I32:$src))],
-                            "f32.reinterpret/i32\t$dst, $src">;
+                            "f32.reinterpret/i32\t$dst, $src", 0xbe>;
 def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src),
                             [(set I64:$dst, (bitconvert F64:$src))],
-                            "i64.reinterpret/f64\t$dst, $src">;
+                            "i64.reinterpret/f64\t$dst, $src", 0xbd>;
 def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src),
                             [(set F64:$dst, (bitconvert I64:$src))],
-                            "f64.reinterpret/i64\t$dst, $src">;
+                            "f64.reinterpret/i64\t$dst, $src", 0xbf>;
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 6456972..030be08 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -15,26 +15,26 @@
 let Defs = [ARGUMENTS] in {
 
 let isCommutable = 1 in
-defm ADD : BinaryFP<fadd, "add ">;
-defm SUB : BinaryFP<fsub, "sub ">;
+defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>;
+defm SUB : BinaryFP<fsub, "sub ", 0x93, 0xa1>;
 let isCommutable = 1 in
-defm MUL : BinaryFP<fmul, "mul ">;
-defm DIV : BinaryFP<fdiv, "div ">;
-defm SQRT : UnaryFP<fsqrt, "sqrt">;
+defm MUL : BinaryFP<fmul, "mul ", 0x94, 0xa2>;
+defm DIV : BinaryFP<fdiv, "div ", 0x95, 0xa3>;
+defm SQRT : UnaryFP<fsqrt, "sqrt", 0x91, 0x9f>;
 
-defm ABS : UnaryFP<fabs, "abs ">;
-defm NEG : UnaryFP<fneg, "neg ">;
-defm COPYSIGN : BinaryFP<fcopysign, "copysign">;
+defm ABS : UnaryFP<fabs, "abs ", 0x8b, 0x99>;
+defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>;
+defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>;
 
 let isCommutable = 1 in {
-defm MIN : BinaryFP<fminnan, "min ">;
-defm MAX : BinaryFP<fmaxnan, "max ">;
+defm MIN : BinaryFP<fminnan, "min ", 0x96, 0xa4>;
+defm MAX : BinaryFP<fmaxnan, "max ", 0x97, 0xa5>;
 } // isCommutable = 1
 
-defm CEIL : UnaryFP<fceil, "ceil">;
-defm FLOOR : UnaryFP<ffloor, "floor">;
-defm TRUNC : UnaryFP<ftrunc, "trunc">;
-defm NEAREST : UnaryFP<fnearbyint, "nearest">;
+defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>;
+defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>;
+defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
+defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
 
 } // Defs = [ARGUMENTS]
 
@@ -51,13 +51,13 @@ def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
 let Defs = [ARGUMENTS] in {
 
 let isCommutable = 1 in {
-defm EQ : ComparisonFP<SETOEQ, "eq  ">;
-defm NE : ComparisonFP<SETUNE, "ne  ">;
+defm EQ : ComparisonFP<SETOEQ, "eq  ", 0x5b, 0x61>;
+defm NE : ComparisonFP<SETUNE, "ne  ", 0x5c, 0x62>;
 } // isCommutable = 1
-defm LT : ComparisonFP<SETOLT, "lt  ">;
-defm LE : ComparisonFP<SETOLE, "le  ">;
-defm GT : ComparisonFP<SETOGT, "gt  ">;
-defm GE : ComparisonFP<SETOGE, "ge  ">;
+defm LT : ComparisonFP<SETOLT, "lt  ", 0x5d, 0x63>;
+defm LE : ComparisonFP<SETOLE, "le  ", 0x5e, 0x64>;
+defm GT : ComparisonFP<SETOGT, "gt  ", 0x5f, 0x65>;
+defm GE : ComparisonFP<SETOGE, "ge  ", 0x60, 0x66>;
 
 } // Defs = [ARGUMENTS]
 
@@ -79,10 +79,10 @@ let Defs = [ARGUMENTS] in {
 
 def SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond),
                    [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
-                   "f32.select\t$dst, $lhs, $rhs, $cond">;
+                   "f32.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 def SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond),
                    [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
-                   "f64.select\t$dst, $lhs, $rhs, $cond">;
+                   "f64.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 
 } // Defs = [ARGUMENTS]
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 8008dd3..5b24984 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -13,67 +13,90 @@
 //===----------------------------------------------------------------------===//
 
 // WebAssembly Instruction Format.
-class WebAssemblyInst<string asmstr> : Instruction {
-  field bits<0> Inst; // Instruction encoding.
+class WebAssemblyInst<bits<32> inst, string asmstr> : Instruction {
+  field bits<32> Inst = inst; // Instruction encoding.
   let Namespace   = "WebAssembly";
   let Pattern     = [];
   let AsmString   = asmstr;
 }
 
 // Normal instructions.
-class I<dag oops, dag iops, list<dag> pattern, string asmstr = "">
-    : WebAssemblyInst<asmstr> {
+class I<dag oops, dag iops, list<dag> pattern, string asmstr = "", bits<32> inst = -1>
+    : WebAssemblyInst<inst, asmstr> {
   dag OutOperandList = oops;
   dag InOperandList  = iops;
   let Pattern        = pattern;
 }
 
+class SIMD_I<dag oops, dag iops, list<dag> pattern,
+             string asmstr = "", bits<32> inst = -1>
+    : I<oops, iops, pattern, asmstr, inst>, Requires<[HasSIMD128]>;
+
 // Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node, string name> {
+multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$src),
                [(set I32:$dst, (node I32:$src))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $src")), i32Inst>;
   def _I64 : I<(outs I64:$dst), (ins I64:$src),
                [(set I64:$dst, (node I64:$src))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $src")), i64Inst>;
 }
-multiclass BinaryInt<SDNode node, string name> {
+multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
                [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")), i32Inst>;
   def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs),
                [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")), i64Inst>;
 }
-multiclass UnaryFP<SDNode node, string name> {
+multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs F32:$dst), (ins F32:$src),
                [(set F32:$dst, (node F32:$src))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $src")), f32Inst>;
   def _F64 : I<(outs F64:$dst), (ins F64:$src),
                [(set F64:$dst, (node F64:$src))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $src")), f64Inst>;
 }
-multiclass BinaryFP<SDNode node, string name> {
+multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs),
                [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")), f32Inst>;
   def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs),
                [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")), f64Inst>;
+}
+multiclass SIMDBinary<SDNode node, SDNode fnode, string name> {
+  def _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i8x16.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i16x8.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))],
+                      !strconcat("f32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+
 }
-multiclass ComparisonInt<CondCode cond, string name> {
+multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
                [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               i32Inst>;
   def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs),
                [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               i64Inst>;
 }
-multiclass ComparisonFP<CondCode cond, string name> {
+multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs),
                [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               f32Inst>;
   def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs),
                [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               f64Inst>;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 2fd3eab..0e2d8bb 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -60,19 +60,19 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
           ? MRI.getRegClass(DestReg)
           : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg);
 
-  unsigned CopyLocalOpcode;
+  unsigned CopyOpcode;
   if (RC == &WebAssembly::I32RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32;
+    CopyOpcode = WebAssembly::COPY_I32;
   else if (RC == &WebAssembly::I64RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64;
+    CopyOpcode = WebAssembly::COPY_I64;
   else if (RC == &WebAssembly::F32RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32;
+    CopyOpcode = WebAssembly::COPY_F32;
   else if (RC == &WebAssembly::F64RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64;
+    CopyOpcode = WebAssembly::COPY_F64;
   else
     llvm_unreachable("Unexpected register class");
 
-  BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg)
+  BuildMI(MBB, I, DL, get(CopyOpcode), DestReg)
       .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
 }
 
@@ -142,7 +142,10 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                            int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::instr_iterator I = MBB.instr_end();
   unsigned Count = 0;
 
@@ -161,11 +164,14 @@ unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
-unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                             MachineBasicBlock *TBB,
                                             MachineBasicBlock *FBB,
                                             ArrayRef<MachineOperand> Cond,
-                                            const DebugLoc &DL) const {
+                                            const DebugLoc &DL,
+                                            int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
+
   if (Cond.empty()) {
     if (!TBB)
       return 0;
@@ -190,7 +196,7 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-bool WebAssemblyInstrInfo::ReverseBranchCondition(
+bool WebAssemblyInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Expected a flag and a successor block");
   Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index d93f958..df6c937 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -48,12 +48,14 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 4b31987..dcfd1a4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -71,18 +71,39 @@ let OperandNamespace = "WebAssembly" in {
 let OperandType = "OPERAND_BASIC_BLOCK" in
 def bb_op : Operand<OtherVT>;
 
-let OperandType = "OPERAND_FP32IMM" in
+let OperandType = "OPERAND_LOCAL" in
+def local_op : Operand<i32>;
+
+let OperandType = "OPERAND_I32IMM" in
+def i32imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_I64IMM" in
+def i64imm_op : Operand<i64>;
+
+let OperandType = "OPERAND_F32IMM" in
 def f32imm_op : Operand<f32>;
 
-let OperandType = "OPERAND_FP64IMM" in
+let OperandType = "OPERAND_F64IMM" in
 def f64imm_op : Operand<f64>;
 
+let OperandType = "OPERAND_FUNCTION32" in
+def function32_op : Operand<i32>;
+
+let OperandType = "OPERAND_OFFSET32" in
+def offset32_op : Operand<i32>;
+
 let OperandType = "OPERAND_P2ALIGN" in {
 def P2Align : Operand<i32> {
   let PrintMethod = "printWebAssemblyP2AlignOperand";
 }
 } // OperandType = "OPERAND_P2ALIGN"
 
+let OperandType = "OPERAND_SIGNATURE" in {
+def Signature : Operand<i32> {
+  let PrintMethod = "printWebAssemblySignatureOperand";
+}
+} // OperandType = "OPERAND_SIGNATURE"
+
 } // OperandNamespace = "WebAssembly"
 
 //===----------------------------------------------------------------------===//
@@ -100,10 +121,20 @@ multiclass ARGUMENT<WebAssemblyRegClass vt> {
   def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
                        [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
 }
+multiclass SIMD_ARGUMENT<ValueType vt> {
+  let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
+  def ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno),
+                            [(set (vt V128:$res),
+                                  (WebAssemblyargument timm:$argno))]>;
+}
 defm : ARGUMENT<I32>;
 defm : ARGUMENT<I64>;
 defm : ARGUMENT<F32>;
 defm : ARGUMENT<F64>;
+defm : SIMD_ARGUMENT<v16i8>;
+defm : SIMD_ARGUMENT<v8i16>;
+defm : SIMD_ARGUMENT<v4i32>;
+defm : SIMD_ARGUMENT<v4f32>;
 
 let Defs = [ARGUMENTS] in {
 
@@ -111,40 +142,63 @@ let Defs = [ARGUMENTS] in {
 // are implied by virtual register uses and defs.
 multiclass LOCAL<WebAssemblyRegClass vt> {
 let hasSideEffects = 0 in {
-  // COPY_LOCAL is not an actual instruction in wasm, but since we allow
-  // get_local and set_local to be implicit, we can have a COPY_LOCAL which
-  // is actually a no-op because all the work is done in the implied
-  // get_local and set_local.
-  let isAsCheapAsAMove = 1 in
-  def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [],
-                         "copy_local\t$res, $src">;
-
-  // TEE_LOCAL is similar to COPY_LOCAL, but writes two copies of its result.
-  // Typically this would be used to stackify one result and write the other
-  // result to a local.
-  let isAsCheapAsAMove = 1 in
-  def TEE_LOCAL_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), [],
-                        "tee_local\t$res, $also, $src">;
+  // COPY is not an actual instruction in wasm, but since we allow get_local and
+  // set_local to be implicit during most of codegen, we can have a COPY which
+  // is actually a no-op because all the work is done in the implied get_local
+  // and set_local. COPYs are eliminated (and replaced with
+  // get_local/set_local) in the ExplicitLocals pass.
+  let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
+  def COPY_#vt : I<(outs vt:$res), (ins vt:$src), [], "copy_local\t$res, $src">;
+
+  // TEE is similar to COPY, but writes two copies of its result. Typically
+  // this would be used to stackify one result and write the other result to a
+  // local.
+  let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
+  def TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), [],
+                  "tee_local\t$res, $also, $src">;
+
+  // This is the actual get_local instruction in wasm. These are made explicit
+  // by the ExplicitLocals pass. It has mayLoad because it reads from a wasm
+  // local, which is a side effect not otherwise modeled in LLVM.
+  let mayLoad = 1, isAsCheapAsAMove = 1 in
+  def GET_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local), [],
+                        "get_local\t$res, $local", 0x20>;
+
+  // This is the actual set_local instruction in wasm. These are made explicit
+  // by the ExplicitLocals pass. It has mayStore because it writes to a wasm
+  // local, which is a side effect not otherwise modeled in LLVM.
+  let mayStore = 1, isAsCheapAsAMove = 1 in
+  def SET_LOCAL_#vt : I<(outs), (ins local_op:$local, vt:$src), [],
+                        "set_local\t$local, $src", 0x21>;
+
+  // This is the actual tee_local instruction in wasm. TEEs are turned into
+  // TEE_LOCALs by the ExplicitLocals pass. It has mayStore for the same reason
+  // as SET_LOCAL.
+  let mayStore = 1, isAsCheapAsAMove = 1 in
+  def TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src), [],
+                         "tee_local\t$res, $local, $src", 0x22>;
+
 } // hasSideEffects = 0
 }
 defm : LOCAL<I32>;
 defm : LOCAL<I64>;
 defm : LOCAL<F32>;
 defm : LOCAL<F64>;
+defm : LOCAL<V128>, Requires<[HasSIMD128]>;
 
 let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
-def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
+def CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
                   [(set I32:$res, imm:$imm)],
-                  "i32.const\t$res, $imm">;
-def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
+                  "i32.const\t$res, $imm", 0x41>;
+def CONST_I64 : I<(outs I64:$res), (ins i64imm_op:$imm),
                   [(set I64:$res, imm:$imm)],
-                  "i64.const\t$res, $imm">;
+                  "i64.const\t$res, $imm", 0x42>;
 def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm),
                   [(set F32:$res, fpimm:$imm)],
-                  "f32.const\t$res, $imm">;
+                  "f32.const\t$res, $imm", 0x43>;
 def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
                   [(set F64:$res, fpimm:$imm)],
-                  "f64.const\t$res, $imm">;
+                  "f64.const\t$res, $imm", 0x44>;
 } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 7eaa57b..e872dc2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -17,51 +17,51 @@ let Defs = [ARGUMENTS] in {
 // The spaces after the names are for aesthetic purposes only, to make
 // operands line up vertically after tab expansion.
 let isCommutable = 1 in
-defm ADD : BinaryInt<add, "add ">;
-defm SUB : BinaryInt<sub, "sub ">;
+defm ADD : BinaryInt<add, "add ", 0x6a, 0x7c>;
+defm SUB : BinaryInt<sub, "sub ", 0x6b, 0x7d>;
 let isCommutable = 1 in
-defm MUL : BinaryInt<mul, "mul ">;
+defm MUL : BinaryInt<mul, "mul ", 0x6c, 0x7e>;
 // Divide and remainder trap on a zero denominator.
 let hasSideEffects = 1 in {
-defm DIV_S : BinaryInt<sdiv, "div_s">;
-defm DIV_U : BinaryInt<udiv, "div_u">;
-defm REM_S : BinaryInt<srem, "rem_s">;
-defm REM_U : BinaryInt<urem, "rem_u">;
+defm DIV_S : BinaryInt<sdiv, "div_s", 0x6d, 0x7f>;
+defm DIV_U : BinaryInt<udiv, "div_u", 0x6e, 0x80>;
+defm REM_S : BinaryInt<srem, "rem_s", 0x6f, 0x81>;
+defm REM_U : BinaryInt<urem, "rem_u", 0x70, 0x82>;
 } // hasSideEffects = 1
 let isCommutable = 1 in {
-defm AND : BinaryInt<and, "and ">;
-defm OR : BinaryInt<or, "or  ">;
-defm XOR : BinaryInt<xor, "xor ">;
+defm AND : BinaryInt<and, "and ", 0x71, 0x83>;
+defm OR : BinaryInt<or, "or  ", 0x72, 0x84>;
+defm XOR : BinaryInt<xor, "xor ", 0x73, 0x85>;
 } // isCommutable = 1
-defm SHL : BinaryInt<shl, "shl ">;
-defm SHR_U : BinaryInt<srl, "shr_u">;
-defm SHR_S : BinaryInt<sra, "shr_s">;
-defm ROTL : BinaryInt<rotl, "rotl">;
-defm ROTR : BinaryInt<rotr, "rotr">;
+defm SHL : BinaryInt<shl, "shl ", 0x74, 0x86>;
+defm SHR_S : BinaryInt<sra, "shr_s", 0x75, 0x87>;
+defm SHR_U : BinaryInt<srl, "shr_u", 0x76, 0x88>;
+defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>;
+defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>;
 
 let isCommutable = 1 in {
-defm EQ : ComparisonInt<SETEQ, "eq  ">;
-defm NE : ComparisonInt<SETNE, "ne  ">;
+defm EQ : ComparisonInt<SETEQ, "eq  ", 0x46, 0x51>;
+defm NE : ComparisonInt<SETNE, "ne  ", 0x47, 0x52>;
 } // isCommutable = 1
-defm LT_S : ComparisonInt<SETLT, "lt_s">;
-defm LE_S : ComparisonInt<SETLE, "le_s">;
-defm LT_U : ComparisonInt<SETULT, "lt_u">;
-defm LE_U : ComparisonInt<SETULE, "le_u">;
-defm GT_S : ComparisonInt<SETGT, "gt_s">;
-defm GE_S : ComparisonInt<SETGE, "ge_s">;
-defm GT_U : ComparisonInt<SETUGT, "gt_u">;
-defm GE_U : ComparisonInt<SETUGE, "ge_u">;
+defm LT_S : ComparisonInt<SETLT,  "lt_s", 0x48, 0x53>;
+defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>;
+defm GT_S : ComparisonInt<SETGT,  "gt_s", 0x4a, 0x55>;
+defm GT_U : ComparisonInt<SETUGT, "gt_u", 0x4b, 0x56>;
+defm LE_S : ComparisonInt<SETLE,  "le_s", 0x4c, 0x57>;
+defm LE_U : ComparisonInt<SETULE, "le_u", 0x4d, 0x58>;
+defm GE_S : ComparisonInt<SETGE,  "ge_s", 0x4e, 0x59>;
+defm GE_U : ComparisonInt<SETUGE, "ge_u", 0x4f, 0x5a>;
 
-defm CLZ : UnaryInt<ctlz, "clz ">;
-defm CTZ : UnaryInt<cttz, "ctz ">;
-defm POPCNT : UnaryInt<ctpop, "popcnt">;
+defm CLZ : UnaryInt<ctlz, "clz ", 0x67, 0x79>;
+defm CTZ : UnaryInt<cttz, "ctz ", 0x68, 0x7a>;
+defm POPCNT : UnaryInt<ctpop, "popcnt", 0x69, 0x7b>;
 
 def EQZ_I32 : I<(outs I32:$dst), (ins I32:$src),
                 [(set I32:$dst, (setcc I32:$src, 0, SETEQ))],
-                "i32.eqz \t$dst, $src">;
+                "i32.eqz \t$dst, $src", 0x45>;
 def EQZ_I64 : I<(outs I32:$dst), (ins I64:$src),
                 [(set I32:$dst, (setcc I64:$src, 0, SETEQ))],
-                "i64.eqz \t$dst, $src">;
+                "i64.eqz \t$dst, $src", 0x50>;
 
 } // Defs = [ARGUMENTS]
 
@@ -75,10 +75,10 @@ let Defs = [ARGUMENTS] in {
 
 def SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
                    [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
-                   "i32.select\t$dst, $lhs, $rhs, $cond">;
+                   "i32.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 def SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond),
                    [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
-                   "i64.select\t$dst, $lhs, $rhs, $cond">;
+                   "i64.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 
 } // Defs = [ARGUMENTS]
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 521c664..b606ebb 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -41,15 +41,13 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 }]>;
 
 // GlobalAddresses are conceptually unsigned values, so we can also fold them
-// into immediate values as long as their offsets are non-negative.
+// into immediate values as long as the add is 'nuw'.
+// TODO: We'd like to also match GA offsets but there are cases where the
+// register can have a negative value. Find out what more we can do.
 def regPlusGA : PatFrag<(ops node:$addr, node:$off),
                         (add node:$addr, node:$off),
                         [{
-  return N->getFlags()->hasNoUnsignedWrap() ||
-         (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper &&
-          isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) &&
-          cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0))
-             ->getOffset() >= 0);
+  return N->getFlags()->hasNoUnsignedWrap();
 }]>;
 
 // We don't need a regPlusES because external symbols never have constant
@@ -58,636 +56,631 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
 let Defs = [ARGUMENTS] in {
 
 // Basic load.
-def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i32.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i64.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f32.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f64.load\t$dst, ${off}(${addr})${p2align}">;
+// FIXME: When we can break syntax compatibility, reorder the fields in the
+// asmstrings to match the binary encoding.
+def LOAD_I32 : I<(outs I32:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "i32.load\t$dst, ${off}(${addr})${p2align}", 0x28>;
+def LOAD_I64 : I<(outs I64:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "i64.load\t$dst, ${off}(${addr})${p2align}", 0x29>;
+def LOAD_F32 : I<(outs F32:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
+def LOAD_F64 : I<(outs F64:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
 
 } // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
-def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>;
-def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>;
-def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>;
-def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>;
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, 0, $addr)>;
 
 // Select loads with a constant offset.
 def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F32 texternalsym:$off, $addr, 0)>;
+          (LOAD_F32 0, texternalsym:$off, $addr)>;
 def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F64 texternalsym:$off, $addr, 0)>;
+          (LOAD_F64 0, texternalsym:$off, $addr)>;
 
 // Select loads with just a constant offset.
-def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, texternalsym:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Extending load.
-def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD8_S_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
+def LOAD8_U_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
+def LOAD16_S_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
+def LOAD16_U_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
+def LOAD8_S_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
+def LOAD8_U_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
+def LOAD16_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
+def LOAD16_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
+def LOAD32_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
+def LOAD32_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
 
 } // Defs = [ARGUMENTS]
 
 // Select extending loads with no constant offset.
-def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select extending loads with a constant offset.
 def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select extending loads with just a constant offset.
 def : Pat<(i32 (sextloadi8 imm:$off)),
-          (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 imm:$off)),
-          (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 imm:$off)),
-          (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 imm:$off)),
-          (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 imm:$off)),
-          (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
 // Select "don't care" extending loads with no constant offset.
-def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select "don't care" extending loads with a constant offset.
 def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select "don't care" extending loads with just a constant offset.
 def : Pat<(i32 (extloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Basic store.
-// Note that we split the patterns out of the instruction definitions because
-// WebAssembly's stores return their operand value, and tablegen doesn't like
-// instruction definition patterns that don't reference all of the output
-// operands.
 // Note: WebAssembly inverts SelectionDAG's usual operand order.
-def STORE_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, I32:$val), [],
-                   "i32.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, I64:$val), [],
-                   "i64.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_F32  : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, F32:$val), [],
-                   "f32.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_F64  : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, F64:$val), [],
-                   "f64.store\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE_I32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            I32:$val), [],
+                   "i32.store\t${off}(${addr})${p2align}, $val", 0x36>;
+def STORE_I64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            I64:$val), [],
+                   "i64.store\t${off}(${addr})${p2align}, $val", 0x37>;
+def STORE_F32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            F32:$val), [],
+                   "f32.store\t${off}(${addr})${p2align}, $val", 0x38>;
+def STORE_F64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            F64:$val), [],
+                   "f64.store\t${off}(${addr})${p2align}, $val", 0x39>;
 
 } // Defs = [ARGUMENTS]
 
 // Select stores with no constant offset.
-def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>;
-def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>;
-def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>;
-def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>;
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, 0, I32:$addr, F64:$val)>;
 
 // Select stores with a constant offset.
 def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, I32:$addr, F64:$val)>;
 
 // Select stores with just a constant offset.
 def : Pat<(store I32:$val, imm:$off),
-          (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, imm:$off),
-          (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, imm:$off),
-          (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, imm:$off),
-          (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, (CONST_I32 0), F64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Truncating store.
-def STORE8_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I32:$val), [],
-                    "i32.store8\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I32:$val), [],
-                    "i32.store16\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE8_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store8\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store16\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store32\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE8_I32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I32:$val), [],
+                    "i32.store8\t${off}(${addr})${p2align}, $val", 0x3a>;
+def STORE16_I32 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I32:$val), [],
+                    "i32.store16\t${off}(${addr})${p2align}, $val", 0x3b>;
+def STORE8_I64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store8\t${off}(${addr})${p2align}, $val", 0x3c>;
+def STORE16_I64 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store16\t${off}(${addr})${p2align}, $val", 0x3d>;
+def STORE32_I64 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store32\t${off}(${addr})${p2align}, $val", 0x3e>;
 
 } // Defs = [ARGUMENTS]
 
 // Select truncating stores with no constant offset.
 def : Pat<(truncstorei8 I32:$val, I32:$addr),
-          (STORE8_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, I32:$addr),
-          (STORE16_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, I32:$addr),
-          (STORE8_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, I32:$addr),
-          (STORE16_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, I32:$addr),
-          (STORE32_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, 0, I32:$addr, I64:$val)>;
 
 // Select truncating stores with a constant offset.
 def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
                                        (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (add I32:$addr,
                              (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 
 // Select truncating stores with just a constant offset.
 def : Pat<(truncstorei8 I32:$val, imm:$off),
-          (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, imm:$off),
-          (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, imm:$off),
-          (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, imm:$off),
-          (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, imm:$off),
-          (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Current memory size.
-def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins),
-                           [(set I32:$dst, (int_wasm_current_memory))],
-                           "current_memory\t$dst">,
+def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
+                           [],
+                           "current_memory\t$dst", 0x3f>,
                          Requires<[HasAddr32]>;
-def CURRENT_MEMORY_I64 : I<(outs I64:$dst), (ins),
-                           [(set I64:$dst, (int_wasm_current_memory))],
-                           "current_memory\t$dst">,
-                         Requires<[HasAddr64]>;
 
 // Grow memory.
-def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta),
-                        [(int_wasm_grow_memory I32:$delta)],
-                        "grow_memory\t$delta">,
+def GROW_MEMORY_I32 : I<(outs), (ins i32imm:$flags, I32:$delta),
+                        [],
+                        "grow_memory\t$delta", 0x40>,
                       Requires<[HasAddr32]>;
-def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta),
-                        [(int_wasm_grow_memory I64:$delta)],
-                        "grow_memory\t$delta">,
-                      Requires<[HasAddr64]>;
 
 } // Defs = [ARGUMENTS]
+
+def : Pat<(int_wasm_current_memory),
+          (CURRENT_MEMORY_I32 0)>;
+def : Pat<(int_wasm_grow_memory I32:$delta),
+          (GROW_MEMORY_I32 0, $delta)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 3e29906..e403534 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -12,5 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-// TODO: Implement SIMD instructions.
-// Note: use Requires<[HasSIMD128]>.
+let isCommutable = 1 in {
+defm ADD : SIMDBinary<add, fadd, "add ">;
+defm MUL: SIMDBinary<mul, fmul, "mul ">;
+} // isCommutable = 1
+defm SUB: SIMDBinary<sub, fsub, "sub ">;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index af53f3d..7ea5d05 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyLowerBrUnless final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Lower br_unless";
   }
 
@@ -104,12 +104,12 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // If we weren't able to invert the condition in place. Insert an
-      // expression to invert it.
+      // instruction to invert it.
       if (!Inverted) {
         unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-        MFI.stackifyVReg(Tmp);
         BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
             .addReg(Cond);
+        MFI.stackifyVReg(Tmp);
         Cond = Tmp;
         Inverted = true;
       }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
new file mode 100644
index 0000000..72cb1cc
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -0,0 +1,1184 @@
+//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file lowers exception-related instructions and setjmp/longjmp
+/// function calls in order to use Emscripten's JavaScript try and catch
+/// mechanism.
+///
+/// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's
+/// try and catch syntax and relevant exception-related libraries implemented
+/// in JavaScript glue code that will be produced by Emscripten. This is similar
+/// to the current Emscripten asm.js exception handling in fastcomp. For
+/// fastcomp's EH / SjLj scheme, see these files in fastcomp LLVM branch:
+/// (Location: https://github.com/kripken/emscripten-fastcomp)
+/// lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp
+/// lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp
+/// lib/Target/JSBackend/JSBackend.cpp
+/// lib/Target/JSBackend/CallHandlers.h
+///
+/// * Exception handling
+/// This pass lowers invokes and landingpads into library functions in JS glue
+/// code. Invokes are lowered into function wrappers called invoke wrappers that
+/// exist in JS side, which wraps the original function call with JS try-catch.
+/// If an exception occurred, cxa_throw() function in JS side sets some
+/// variables (see below) so we can check whether an exception occurred from
+/// wasm code and handle it appropriately.
+///
+/// * Setjmp-longjmp handling
+/// This pass lowers setjmp to a reasonably-performant approach for emscripten.
+/// The idea is that each block with a setjmp is broken up into two parts: the
+/// part containing setjmp and the part right after the setjmp. The latter part
+/// is either reached from the setjmp, or later from a longjmp. To handle the
+/// longjmp, all calls that might longjmp are also called using invoke wrappers
+/// and thus JS / try-catch. JS longjmp() function also sets some variables so
+/// we can check / whether a longjmp occurred from wasm code. Each block with a
+/// function call that might longjmp is also split up after the longjmp call.
+/// After the longjmp call, we check whether a longjmp occurred, and if it did,
+/// which setjmp it corresponds to, and jump to the right post-setjmp block.
+/// We assume setjmp-longjmp handling always run after EH handling, which means
+/// we don't expect any exception-related instructions when SjLj runs.
+/// FIXME Currently this scheme does not support indirect call of setjmp,
+/// because of the limitation of the scheme itself. fastcomp does not support it
+/// either.
+///
+/// In detail, this pass does following things:
+///
+/// 1) Create three global variables: __THREW__, __threwValue, and __tempRet0.
+///    __tempRet0 will be set within __cxa_find_matching_catch() function in
+///    JS library, and __THREW__ and __threwValue will be set in invoke wrappers
+///    in JS glue code. For what invoke wrappers are, refer to 3). These
+///    variables are used for both exceptions and setjmp/longjmps.
+///    __THREW__ indicates whether an exception or a longjmp occurred or not. 0
+///    means nothing occurred, 1 means an exception occurred, and other numbers
+///    mean a longjmp occurred. In the case of longjmp, __threwValue variable
+///    indicates the corresponding setjmp buffer the longjmp corresponds to.
+///    In exception handling, __tempRet0 indicates the type of an exception
+///    caught, and in setjmp/longjmp, it means the second argument to longjmp
+///    function.
+///
+/// * Exception handling
+///
+/// 2) Create setThrew and setTempRet0 functions.
+///    The global variables created in 1) will exist in wasm address space,
+///    but their values should be set in JS code, so we provide these functions
+///    as interfaces to JS glue code. These functions are equivalent to the
+///    following JS functions, which actually exist in asm.js version of JS
+///    library.
+///
+///    function setThrew(threw, value) {
+///      if (__THREW__ == 0) {
+///        __THREW__ = threw;
+///        __threwValue = value;
+///      }
+///    }
+///
+///    function setTempRet0(value) {
+///      __tempRet0 = value;
+///    }
+///
+/// 3) Lower
+///      invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
+///    into
+///      __THREW__ = 0;
+///      call @__invoke_SIG(func, arg1, arg2)
+///      %__THREW__.val = __THREW__;
+///      __THREW__ = 0;
+///      if (%__THREW__.val == 1)
+///        goto %lpad
+///      else
+///         goto %invoke.cont
+///    SIG is a mangled string generated based on the LLVM IR-level function
+///    signature. After LLVM IR types are lowered to the target wasm types,
+///    the names for these wrappers will change based on wasm types as well,
+///    as in invoke_vi (function takes an int and returns void). The bodies of
+///    these wrappers will be generated in JS glue code, and inside those
+///    wrappers we use JS try-catch to generate actual exception effects. It
+///    also calls the original callee function. An example wrapper in JS code
+///    would look like this:
+///      function invoke_vi(index,a1) {
+///        try {
+///          Module["dynCall_vi"](index,a1); // This calls original callee
+///        } catch(e) {
+///          if (typeof e !== 'number' && e !== 'longjmp') throw e;
+///          asm["setThrew"](1, 0); // setThrew is called here
+///        }
+///      }
+///    If an exception is thrown, __THREW__ will be set to true in a wrapper,
+///    so we can jump to the right BB based on this value.
+///
+/// 4) Lower
+///      %val = landingpad catch c1 catch c2 catch c3 ...
+///      ... use %val ...
+///    into
+///      %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
+///      %val = {%fmc, __tempRet0}
+///      ... use %val ...
+///    Here N is a number calculated based on the number of clauses.
+///    Global variable __tempRet0 is set within __cxa_find_matching_catch() in
+///    JS glue code.
+///
+/// 5) Lower
+///      resume {%a, %b}
+///    into
+///      call @__resumeException(%a)
+///    where __resumeException() is a function in JS glue code.
+///
+/// 6) Lower
+///      call @llvm.eh.typeid.for(type) (intrinsic)
+///    into
+///      call @llvm_eh_typeid_for(type)
+///    llvm_eh_typeid_for function will be generated in JS glue code.
+///
+/// * Setjmp / Longjmp handling
+///
+/// 7) In the function entry that calls setjmp, initialize setjmpTable and
+///    sejmpTableSize as follows:
+///      setjmpTableSize = 4;
+///      setjmpTable = (int *) malloc(40);
+///      setjmpTable[0] = 0;
+///    setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS
+///    code.
+///
+/// 8) Lower
+///      setjmp(buf)
+///    into
+///      setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+///      setjmpTableSize = __tempRet0;
+///    For each dynamic setjmp call, setjmpTable stores its ID (a number which
+///    is incrementally assigned from 0) and its label (a unique number that
+///    represents each callsite of setjmp). When we need more entries in
+///    setjmpTable, it is reallocated in saveSetjmp() in JS code and it will
+///    return the new table address, and assign the new table size in
+///    __tempRet0. saveSetjmp also stores the setjmp's ID into the buffer buf.
+///    A BB with setjmp is split into two after setjmp call in order to make the
+///    post-setjmp BB the possible destination of longjmp BB.
+///
+/// 9) Lower
+///      longjmp(buf, value)
+///    into
+///      emscripten_longjmp_jmpbuf(buf, value)
+///    emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+///
+/// 10) Lower every call that might longjmp into
+///      __THREW__ = 0;
+///      call @__invoke_SIG(func, arg1, arg2)
+///      %__THREW__.val = __THREW__;
+///      __THREW__ = 0;
+///      if (%__THREW__.val != 0 & __threwValue != 0) {
+///        %label = testSetjmp(mem[%__THREW__.val], setjmpTable,
+///                            setjmpTableSize);
+///        if (%label == 0)
+///          emscripten_longjmp(%__THREW__.val, __threwValue);
+///        __tempRet0 = __threwValue;
+///      } else {
+///        %label = -1;
+///      }
+///      longjmp_result = __tempRet0;
+///      switch label {
+///        label 1: goto post-setjmp BB 1
+///        label 2: goto post-setjmp BB 2
+///        ...
+///        default: goto splitted next BB
+///      }
+///     testSetjmp examines setjmpTable to see if there is a matching setjmp
+///     call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
+///     will be the address of matching jmp_buf buffer and __threwValue be the
+///     second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
+///     stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
+///     each setjmp callsite. Label 0 means this longjmp buffer does not
+///     correspond to one of the setjmp callsites in this function, so in this
+///     case we just chain the longjmp to the caller. (Here we call
+///     emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
+///     emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
+///     emscripten_longjmp takes an int. Both of them will eventually be lowered
+///     to emscripten_longjmp in s2wasm, but here we need two signatures - we
+///     can't translate an int value to a jmp_buf.)
+///     Label -1 means no longjmp occurred. Otherwise we jump to the right
+///     post-setjmp BB based on the label.
+///
+///===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
+
+static cl::list<std::string>
+    EHWhitelist("emscripten-cxx-exceptions-whitelist",
+                cl::desc("The list of function names in which Emscripten-style "
+                         "exception handling is enabled (see emscripten "
+                         "EMSCRIPTEN_CATCHING_WHITELIST options)"),
+                cl::CommaSeparated);
+
+namespace {
+class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
+  static const char *ThrewGVName;
+  static const char *ThrewValueGVName;
+  static const char *TempRet0GVName;
+  static const char *ResumeFName;
+  static const char *EHTypeIDFName;
+  static const char *SetThrewFName;
+  static const char *SetTempRet0FName;
+  static const char *EmLongjmpFName;
+  static const char *EmLongjmpJmpbufFName;
+  static const char *SaveSetjmpFName;
+  static const char *TestSetjmpFName;
+  static const char *FindMatchingCatchPrefix;
+  static const char *InvokePrefix;
+
+  bool EnableEH;   // Enable exception handling
+  bool EnableSjLj; // Enable setjmp/longjmp handling
+
+  GlobalVariable *ThrewGV;
+  GlobalVariable *ThrewValueGV;
+  GlobalVariable *TempRet0GV;
+  Function *ResumeF;
+  Function *EHTypeIDF;
+  Function *EmLongjmpF;
+  Function *EmLongjmpJmpbufF;
+  Function *SaveSetjmpF;
+  Function *TestSetjmpF;
+
+  // __cxa_find_matching_catch_N functions.
+  // Indexed by the number of clauses in an original landingpad instruction.
+  DenseMap<int, Function *> FindMatchingCatches;
+  // Map of <function signature string, invoke_ wrappers>
+  StringMap<Function *> InvokeWrappers;
+  // Set of whitelisted function names for exception handling
+  std::set<std::string> EHWhitelistSet;
+
+  StringRef getPassName() const override {
+    return "WebAssembly Lower Emscripten Exceptions";
+  }
+
+  bool runEHOnFunction(Function &F);
+  bool runSjLjOnFunction(Function &F);
+  Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
+
+  template <typename CallOrInvoke> Value *wrapInvoke(CallOrInvoke *CI);
+  void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw,
+                      Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label,
+                      Value *&LongjmpResult, BasicBlock *&EndBB);
+  template <typename CallOrInvoke> Function *getInvokeWrapper(CallOrInvoke *CI);
+
+  bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
+  bool canLongjmp(Module &M, const Value *Callee) const;
+
+  void createSetThrewFunction(Module &M);
+  void createSetTempRet0Function(Module &M);
+
+  void rebuildSSA(Function &F);
+
+public:
+  static char ID;
+
+  WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
+      : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
+        ThrewGV(nullptr), ThrewValueGV(nullptr), TempRet0GV(nullptr),
+        ResumeF(nullptr), EHTypeIDF(nullptr), EmLongjmpF(nullptr),
+        EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr), TestSetjmpF(nullptr) {
+    EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
+  }
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+  }
+};
+} // End anonymous namespace
+
+const char *WebAssemblyLowerEmscriptenEHSjLj::ThrewGVName = "__THREW__";
+const char *WebAssemblyLowerEmscriptenEHSjLj::ThrewValueGVName = "__threwValue";
+const char *WebAssemblyLowerEmscriptenEHSjLj::TempRet0GVName = "__tempRet0";
+const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName =
+    "llvm_eh_typeid_for";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SetThrewFName = "setThrew";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SetTempRet0FName = "setTempRet0";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName =
+    "emscripten_longjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName =
+    "emscripten_longjmp_jmpbuf";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix =
+    "__cxa_find_matching_catch_";
+const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_";
+
+char WebAssemblyLowerEmscriptenEHSjLj::ID = 0;
+INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
+                "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
+                false, false)
+
+ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
+                                                         bool EnableSjLj) {
+  return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj);
+}
+
+static bool canThrow(const Value *V) {
+  if (const auto *F = dyn_cast<const Function>(V)) {
+    // Intrinsics cannot throw
+    if (F->isIntrinsic())
+      return false;
+    StringRef Name = F->getName();
+    // leave setjmp and longjmp (mostly) alone, we process them properly later
+    if (Name == "setjmp" || Name == "longjmp")
+      return false;
+    return !F->doesNotThrow();
+  }
+  // not a function, so an indirect call - can throw, we can't tell
+  return true;
+}
+
+// Returns an available name for a global value.
+// If the proposed name already exists in the module, adds '_' at the end of
+// the name until the name is available.
+static inline std::string createGlobalValueName(const Module &M,
+                                                const std::string &Propose) {
+  std::string Name = Propose;
+  while (M.getNamedGlobal(Name))
+    Name += "_";
+  return Name;
+}
+
+// Simple function name mangler.
+// This function simply takes LLVM's string representation of parameter types
+// and concatenate them with '_'. There are non-alphanumeric characters but llc
+// is ok with it, and we need to postprocess these names after the lowering
+// phase anyway.
+static std::string getSignature(FunctionType *FTy) {
+  std::string Sig;
+  raw_string_ostream OS(Sig);
+  OS << *FTy->getReturnType();
+  for (Type *ParamTy : FTy->params())
+    OS << "_" << *ParamTy;
+  if (FTy->isVarArg())
+    OS << "_...";
+  Sig = OS.str();
+  Sig.erase(remove_if(Sig, isspace), Sig.end());
+  // When s2wasm parses .s file, a comma means the end of an argument. So a
+  // mangled function name can contain any character but a comma.
+  std::replace(Sig.begin(), Sig.end(), ',', '.');
+  return Sig;
+}
+
+// Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
+// This is because a landingpad instruction contains two more arguments, a
+// personality function and a cleanup bit, and __cxa_find_matching_catch_N
+// functions are named after the number of arguments in the original landingpad
+// instruction.
+Function *
+WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
+                                                       unsigned NumClauses) {
+  if (FindMatchingCatches.count(NumClauses))
+    return FindMatchingCatches[NumClauses];
+  PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+  SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
+  FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage,
+                       FindMatchingCatchPrefix + Twine(NumClauses + 2), &M);
+  FindMatchingCatches[NumClauses] = F;
+  return F;
+}
+
+// Generate invoke wrapper seqence with preamble and postamble
+// Preamble:
+// __THREW__ = 0;
+// Postamble:
+// %__THREW__.val = __THREW__; __THREW__ = 0;
+// Returns %__THREW__.val, which indicates whether an exception is thrown (or
+// whether longjmp occurred), for future use.
+template <typename CallOrInvoke>
+Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
+  LLVMContext &C = CI->getModule()->getContext();
+
+  // If we are calling a function that is noreturn, we must remove that
+  // attribute. The code we insert here does expect it to return, after we
+  // catch the exception.
+  if (CI->doesNotReturn()) {
+    if (auto *F = dyn_cast<Function>(CI->getCalledValue()))
+      F->removeFnAttr(Attribute::NoReturn);
+    CI->removeAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+  }
+
+  IRBuilder<> IRB(C);
+  IRB.SetInsertPoint(CI);
+
+  // Pre-invoke
+  // __THREW__ = 0;
+  IRB.CreateStore(IRB.getInt32(0), ThrewGV);
+
+  // Invoke function wrapper in JavaScript
+  SmallVector<Value *, 16> Args;
+  // Put the pointer to the callee as first argument, so it can be called
+  // within the invoke wrapper later
+  Args.push_back(CI->getCalledValue());
+  Args.append(CI->arg_begin(), CI->arg_end());
+  CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args);
+  NewCall->takeName(CI);
+  NewCall->setCallingConv(CI->getCallingConv());
+  NewCall->setDebugLoc(CI->getDebugLoc());
+
+  // Because we added the pointer to the callee as first argument, all
+  // argument attribute indices have to be incremented by one.
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &InvokePAL = CI->getAttributes();
+  CallSite::arg_iterator AI = CI->arg_begin();
+  unsigned i = 1; // Argument attribute index starts from 1
+  for (unsigned e = CI->getNumArgOperands(); i <= e; ++AI, ++i) {
+    if (InvokePAL.hasAttributes(i)) {
+      AttrBuilder B(InvokePAL, i);
+      AttributesVec.push_back(AttributeSet::get(C, i + 1, B));
+    }
+  }
+  // Add any return attributes.
+  if (InvokePAL.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getRetAttributes()));
+  // Add any function attributes.
+  if (InvokePAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getFnAttributes()));
+  // Reconstruct the AttributesList based on the vector we constructed.
+  AttributeSet NewCallPAL = AttributeSet::get(C, AttributesVec);
+  NewCall->setAttributes(NewCallPAL);
+
+  CI->replaceAllUsesWith(NewCall);
+
+  // Post-invoke
+  // %__THREW__.val = __THREW__; __THREW__ = 0;
+  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+  IRB.CreateStore(IRB.getInt32(0), ThrewGV);
+  return Threw;
+}
+
+// Get matching invoke wrapper based on callee signature
+template <typename CallOrInvoke>
+Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
+  Module *M = CI->getModule();
+  SmallVector<Type *, 16> ArgTys;
+  Value *Callee = CI->getCalledValue();
+  FunctionType *CalleeFTy;
+  if (auto *F = dyn_cast<Function>(Callee))
+    CalleeFTy = F->getFunctionType();
+  else {
+    auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType();
+    CalleeFTy = dyn_cast<FunctionType>(CalleeTy);
+  }
+
+  std::string Sig = getSignature(CalleeFTy);
+  if (InvokeWrappers.find(Sig) != InvokeWrappers.end())
+    return InvokeWrappers[Sig];
+
+  // Put the pointer to the callee as first argument
+  ArgTys.push_back(PointerType::getUnqual(CalleeFTy));
+  // Add argument types
+  ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end());
+
+  FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys,
+                                        CalleeFTy->isVarArg());
+  Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                 InvokePrefix + Sig, M);
+  InvokeWrappers[Sig] = F;
+  return F;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
+                                                  const Value *Callee) const {
+  if (auto *CalleeF = dyn_cast<Function>(Callee))
+    if (CalleeF->isIntrinsic())
+      return false;
+
+  // The reason we include malloc/free here is to exclude the malloc/free
+  // calls generated in setjmp prep / cleanup routines.
+  Function *SetjmpF = M.getFunction("setjmp");
+  Function *MallocF = M.getFunction("malloc");
+  Function *FreeF = M.getFunction("free");
+  if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF)
+    return false;
+
+  // There are functions in JS glue code
+  if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF ||
+      Callee == TestSetjmpF)
+    return false;
+
+  // __cxa_find_matching_catch_N functions cannot longjmp
+  if (Callee->getName().startswith(FindMatchingCatchPrefix))
+    return false;
+
+  // Exception-catching related functions
+  Function *BeginCatchF = M.getFunction("__cxa_begin_catch");
+  Function *EndCatchF = M.getFunction("__cxa_end_catch");
+  Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception");
+  Function *ThrowF = M.getFunction("__cxa_throw");
+  Function *TerminateF = M.getFunction("__clang_call_terminate");
+  if (Callee == BeginCatchF || Callee == EndCatchF ||
+      Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF)
+    return false;
+
+  // Otherwise we don't know
+  return true;
+}
+
+// Generate testSetjmp function call seqence with preamble and postamble.
+// The code this generates is equivalent to the following JavaScript code:
+// if (%__THREW__.val != 0 & threwValue != 0) {
+//   %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
+//   if (%label == 0)
+//     emscripten_longjmp(%__THREW__.val, threwValue);
+//   __tempRet0 = threwValue;
+// } else {
+//   %label = -1;
+// }
+// %longjmp_result = __tempRet0;
+//
+// As output parameters. returns %label, %longjmp_result, and the BB the last
+// instruction (%longjmp_result = ...) is in.
+void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
+    BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable,
+    Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult,
+    BasicBlock *&EndBB) {
+  Function *F = BB->getParent();
+  LLVMContext &C = BB->getModule()->getContext();
+  IRBuilder<> IRB(C);
+  IRB.SetInsertPoint(InsertPt);
+
+  // if (%__THREW__.val != 0 & threwValue != 0)
+  IRB.SetInsertPoint(BB);
+  BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F);
+  BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F);
+  BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F);
+  Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0));
+  Value *ThrewValue =
+      IRB.CreateLoad(ThrewValueGV, ThrewValueGV->getName() + ".val");
+  Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0));
+  Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
+  IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
+
+  // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize);
+  // if (%label == 0)
+  IRB.SetInsertPoint(ThenBB1);
+  BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F);
+  BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
+  Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C),
+                                       Threw->getName() + ".i32p");
+  Value *LoadedThrew =
+      IRB.CreateLoad(ThrewInt, ThrewInt->getName() + ".loaded");
+  Value *ThenLabel = IRB.CreateCall(
+      TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
+  Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
+  IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2);
+
+  // emscripten_longjmp(%__THREW__.val, threwValue);
+  IRB.SetInsertPoint(ThenBB2);
+  IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
+  IRB.CreateUnreachable();
+
+  // __tempRet0 = threwValue;
+  IRB.SetInsertPoint(EndBB2);
+  IRB.CreateStore(ThrewValue, TempRet0GV);
+  IRB.CreateBr(EndBB1);
+
+  IRB.SetInsertPoint(ElseBB1);
+  IRB.CreateBr(EndBB1);
+
+  // longjmp_result = __tempRet0;
+  IRB.SetInsertPoint(EndBB1);
+  PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
+  LabelPHI->addIncoming(ThenLabel, EndBB2);
+
+  LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1);
+
+  // Output parameter assignment
+  Label = LabelPHI;
+  EndBB = EndBB1;
+  LongjmpResult = IRB.CreateLoad(TempRet0GV, "longjmp_result");
+}
+
+// Create setThrew function
+// function setThrew(threw, value) {
+//   if (__THREW__ == 0) {
+//     __THREW__ = threw;
+//     __threwValue = value;
+//   }
+// }
+void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  assert(!M.getNamedGlobal(SetThrewFName) && "setThrew already exists");
+  Type *Params[] = {IRB.getInt32Ty(), IRB.getInt32Ty()};
+  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage, SetThrewFName, &M);
+  Argument *Arg1 = &*(F->arg_begin());
+  Argument *Arg2 = &*(++F->arg_begin());
+  Arg1->setName("threw");
+  Arg2->setName("value");
+  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+  BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", F);
+  BasicBlock *EndBB = BasicBlock::Create(C, "if.end", F);
+
+  IRB.SetInsertPoint(EntryBB);
+  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+  Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(0), "cmp");
+  IRB.CreateCondBr(Cmp, ThenBB, EndBB);
+
+  IRB.SetInsertPoint(ThenBB);
+  IRB.CreateStore(Arg1, ThrewGV);
+  IRB.CreateStore(Arg2, ThrewValueGV);
+  IRB.CreateBr(EndBB);
+
+  IRB.SetInsertPoint(EndBB);
+  IRB.CreateRetVoid();
+}
+
+// Create setTempRet0 function
+// function setTempRet0(value) {
+//   __tempRet0 = value;
+// }
+void WebAssemblyLowerEmscriptenEHSjLj::createSetTempRet0Function(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  assert(!M.getNamedGlobal(SetTempRet0FName) && "setTempRet0 already exists");
+  Type *Params[] = {IRB.getInt32Ty()};
+  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage, SetTempRet0FName, &M);
+  F->arg_begin()->setName("value");
+  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+  IRB.SetInsertPoint(EntryBB);
+  IRB.CreateStore(&*F->arg_begin(), TempRet0GV);
+  IRB.CreateRetVoid();
+}
+
+void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+  DT.recalculate(F); // CFG has been changed
+  SSAUpdater SSA;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
+        Use &U = *UI;
+        ++UI;
+        SSA.Initialize(I.getType(), I.getName());
+        SSA.AddAvailableValue(&BB, &I);
+        Instruction *User = cast<Instruction>(U.getUser());
+        if (User->getParent() == &BB)
+          continue;
+
+        if (PHINode *UserPN = dyn_cast<PHINode>(User))
+          if (UserPN->getIncomingBlock(U) == &BB)
+            continue;
+
+        if (DT.dominates(&I, User))
+          continue;
+        SSA.RewriteUseAfterInsertions(U);
+      }
+    }
+  }
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  Function *SetjmpF = M.getFunction("setjmp");
+  Function *LongjmpF = M.getFunction("longjmp");
+  bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty();
+  bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
+  bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
+
+  // Create global variables __THREW__, threwValue, and __tempRet0, which are
+  // used in common for both exception handling and setjmp/longjmp handling
+  ThrewGV = new GlobalVariable(M, IRB.getInt32Ty(), false,
+                               GlobalValue::ExternalLinkage, IRB.getInt32(0),
+                               createGlobalValueName(M, ThrewGVName));
+  ThrewValueGV = new GlobalVariable(
+      M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, IRB.getInt32(0),
+      createGlobalValueName(M, ThrewValueGVName));
+  TempRet0GV = new GlobalVariable(M, IRB.getInt32Ty(), false,
+                                  GlobalValue::ExternalLinkage, IRB.getInt32(0),
+                                  createGlobalValueName(M, TempRet0GVName));
+
+  bool Changed = false;
+
+  // Exception handling
+  if (EnableEH) {
+    // Register __resumeException function
+    FunctionType *ResumeFTy =
+        FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
+    ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage,
+                               ResumeFName, &M);
+
+    // Register llvm_eh_typeid_for function
+    FunctionType *EHTypeIDTy =
+        FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false);
+    EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage,
+                                 EHTypeIDFName, &M);
+
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      Changed |= runEHOnFunction(F);
+    }
+  }
+
+  // Setjmp/longjmp handling
+  if (DoSjLj) {
+    Changed = true; // We have setjmp or longjmp somewhere
+
+    Function *MallocF = M.getFunction("malloc");
+    Function *FreeF = M.getFunction("free");
+    if (!MallocF || !FreeF)
+      report_fatal_error(
+          "malloc and free must be linked into the module if setjmp is used");
+
+    // Register saveSetjmp function
+    FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+    SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
+                                     IRB.getInt32Ty(), Type::getInt32PtrTy(C),
+                                     IRB.getInt32Ty()};
+    FunctionType *FTy =
+        FunctionType::get(Type::getInt32PtrTy(C), Params, false);
+    SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                   SaveSetjmpFName, &M);
+
+    // Register testSetjmp function
+    Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
+    FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
+    TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                   TestSetjmpFName, &M);
+
+    if (LongjmpF) {
+      // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
+      // defined in JS code
+      EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(),
+                                          GlobalValue::ExternalLinkage,
+                                          EmLongjmpJmpbufFName, &M);
+
+      LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
+    }
+    FTy = FunctionType::get(IRB.getVoidTy(),
+                            {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+    EmLongjmpF =
+        Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M);
+
+    // Only traverse functions that uses setjmp in order not to insert
+    // unnecessary prep / cleanup code in every function
+    SmallPtrSet<Function *, 8> SetjmpUsers;
+    for (User *U : SetjmpF->users()) {
+      auto *UI = cast<Instruction>(U);
+      SetjmpUsers.insert(UI->getFunction());
+    }
+    for (Function *F : SetjmpUsers)
+      runSjLjOnFunction(*F);
+  }
+
+  if (!Changed) {
+    // Delete unused global variables and functions
+    ThrewGV->eraseFromParent();
+    ThrewValueGV->eraseFromParent();
+    TempRet0GV->eraseFromParent();
+    if (ResumeF)
+      ResumeF->eraseFromParent();
+    if (EHTypeIDF)
+      EHTypeIDF->eraseFromParent();
+    if (EmLongjmpF)
+      EmLongjmpF->eraseFromParent();
+    if (SaveSetjmpF)
+      SaveSetjmpF->eraseFromParent();
+    if (TestSetjmpF)
+      TestSetjmpF->eraseFromParent();
+    return false;
+  }
+
+  // If we have made any changes while doing exception handling or
+  // setjmp/longjmp handling, we have to create these functions for JavaScript
+  // to call.
+  createSetThrewFunction(M);
+  createSetTempRet0Function(M);
+
+  return true;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  LLVMContext &C = F.getContext();
+  IRBuilder<> IRB(C);
+  bool Changed = false;
+  SmallVector<Instruction *, 64> ToErase;
+  SmallPtrSet<LandingPadInst *, 32> LandingPads;
+  bool AllowExceptions =
+      areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName());
+
+  for (BasicBlock &BB : F) {
+    auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    Changed = true;
+    LandingPads.insert(II->getLandingPadInst());
+    IRB.SetInsertPoint(II);
+
+    bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue());
+    if (NeedInvoke) {
+      // Wrap invoke with invoke wrapper and generate preamble/postamble
+      Value *Threw = wrapInvoke(II);
+      ToErase.push_back(II);
+
+      // Insert a branch based on __THREW__ variable
+      Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(1), "cmp");
+      IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest());
+
+    } else {
+      // This can't throw, and we don't need this invoke, just replace it with a
+      // call+branch
+      SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
+      CallInst *NewCall = IRB.CreateCall(II->getCalledValue(), Args);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      NewCall->setDebugLoc(II->getDebugLoc());
+      NewCall->setAttributes(II->getAttributes());
+      II->replaceAllUsesWith(NewCall);
+      ToErase.push_back(II);
+
+      IRB.CreateBr(II->getNormalDest());
+
+      // Remove any PHI node entries from the exception destination
+      II->getUnwindDest()->removePredecessor(&BB);
+    }
+  }
+
+  // Process resume instructions
+  for (BasicBlock &BB : F) {
+    // Scan the body of the basic block for resumes
+    for (Instruction &I : BB) {
+      auto *RI = dyn_cast<ResumeInst>(&I);
+      if (!RI)
+        continue;
+
+      // Split the input into legal values
+      Value *Input = RI->getValue();
+      IRB.SetInsertPoint(RI);
+      Value *Low = IRB.CreateExtractValue(Input, 0, "low");
+      // Create a call to __resumeException function
+      IRB.CreateCall(ResumeF, {Low});
+      // Add a terminator to the block
+      IRB.CreateUnreachable();
+      ToErase.push_back(RI);
+    }
+  }
+
+  // Process llvm.eh.typeid.for intrinsics
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI)
+        continue;
+      const Function *Callee = CI->getCalledFunction();
+      if (!Callee)
+        continue;
+      if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for)
+        continue;
+
+      IRB.SetInsertPoint(CI);
+      CallInst *NewCI =
+          IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid");
+      CI->replaceAllUsesWith(NewCI);
+      ToErase.push_back(CI);
+    }
+  }
+
+  // Look for orphan landingpads, can occur in blocks with no predecesors
+  for (BasicBlock &BB : F) {
+    Instruction *I = BB.getFirstNonPHI();
+    if (auto *LPI = dyn_cast<LandingPadInst>(I))
+      LandingPads.insert(LPI);
+  }
+
+  // Handle all the landingpad for this function together, as multiple invokes
+  // may share a single lp
+  for (LandingPadInst *LPI : LandingPads) {
+    IRB.SetInsertPoint(LPI);
+    SmallVector<Value *, 16> FMCArgs;
+    for (unsigned i = 0, e = LPI->getNumClauses(); i < e; ++i) {
+      Constant *Clause = LPI->getClause(i);
+      // As a temporary workaround for the lack of aggregate varargs support
+      // in the interface between JS and wasm, break out filter operands into
+      // their component elements.
+      if (LPI->isFilter(i)) {
+        auto *ATy = cast<ArrayType>(Clause->getType());
+        for (unsigned j = 0, e = ATy->getNumElements(); j < e; ++j) {
+          Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(j), "filter");
+          FMCArgs.push_back(EV);
+        }
+      } else
+        FMCArgs.push_back(Clause);
+    }
+
+    // Create a call to __cxa_find_matching_catch_N function
+    Function *FMCF = getFindMatchingCatch(M, FMCArgs.size());
+    CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
+    Value *Undef = UndefValue::get(LPI->getType());
+    Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
+    Value *TempRet0 =
+        IRB.CreateLoad(TempRet0GV, TempRet0GV->getName() + ".val");
+    Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
+
+    LPI->replaceAllUsesWith(Pair1);
+    ToErase.push_back(LPI);
+  }
+
+  // Erase everything we no longer need in this function
+  for (Instruction *I : ToErase)
+    I->eraseFromParent();
+
+  return Changed;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  LLVMContext &C = F.getContext();
+  IRBuilder<> IRB(C);
+  SmallVector<Instruction *, 64> ToErase;
+  // Vector of %setjmpTable values
+  std::vector<Instruction *> SetjmpTableInsts;
+  // Vector of %setjmpTableSize values
+  std::vector<Instruction *> SetjmpTableSizeInsts;
+
+  // Setjmp preparation
+
+  // This instruction effectively means %setjmpTableSize = 4.
+  // We create this as an instruction intentionally, and we don't want to fold
+  // this instruction to a constant 4, because this value will be used in
+  // SSAUpdater.AddAvailableValue(...) later.
+  BasicBlock &EntryBB = F.getEntryBlock();
+  BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
+      Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
+      &*EntryBB.getFirstInsertionPt());
+  // setjmpTable = (int *) malloc(40);
+  Instruction *SetjmpTable = CallInst::CreateMalloc(
+      SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40),
+      nullptr, nullptr, "setjmpTable");
+  // setjmpTable[0] = 0;
+  IRB.SetInsertPoint(SetjmpTableSize);
+  IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
+  SetjmpTableInsts.push_back(SetjmpTable);
+  SetjmpTableSizeInsts.push_back(SetjmpTableSize);
+
+  // Setjmp transformation
+  std::vector<PHINode *> SetjmpRetPHIs;
+  Function *SetjmpF = M.getFunction("setjmp");
+  for (User *U : SetjmpF->users()) {
+    auto *CI = dyn_cast<CallInst>(U);
+    if (!CI)
+      report_fatal_error("Does not support indirect calls to setjmp");
+
+    BasicBlock *BB = CI->getParent();
+    if (BB->getParent() != &F) // in other function
+      continue;
+
+    // The tail is everything right after the call, and will be reached once
+    // when setjmp is called, and later when longjmp returns to the setjmp
+    BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
+    // Add a phi to the tail, which will be the output of setjmp, which
+    // indicates if this is the first call or a longjmp back. The phi directly
+    // uses the right value based on where we arrive from
+    IRB.SetInsertPoint(Tail->getFirstNonPHI());
+    PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");
+
+    // setjmp initial call returns 0
+    SetjmpRet->addIncoming(IRB.getInt32(0), BB);
+    // The proper output is now this, not the setjmp call itself
+    CI->replaceAllUsesWith(SetjmpRet);
+    // longjmp returns to the setjmp will add themselves to this phi
+    SetjmpRetPHIs.push_back(SetjmpRet);
+
+    // Fix call target
+    // Our index in the function is our place in the array + 1 to avoid index
+    // 0, because index 0 means the longjmp is not ours to handle.
+    IRB.SetInsertPoint(CI);
+    Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
+                     SetjmpTable, SetjmpTableSize};
+    Instruction *NewSetjmpTable =
+        IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
+    Instruction *NewSetjmpTableSize =
+        IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
+    SetjmpTableInsts.push_back(NewSetjmpTable);
+    SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
+    ToErase.push_back(CI);
+  }
+
+  // Update each call that can longjmp so it can return to a setjmp where
+  // relevant.
+
+  // Because we are creating new BBs while processing and don't want to make
+  // all these newly created BBs candidates again for longjmp processing, we
+  // first make the vector of candidate BBs.
+  std::vector<BasicBlock *> BBs;
+  for (BasicBlock &BB : F)
+    BBs.push_back(&BB);
+
+  // BBs.size() will change within the loop, so we query it every time
+  for (unsigned i = 0; i < BBs.size(); i++) {
+    BasicBlock *BB = BBs[i];
+    for (Instruction &I : *BB) {
+      assert(!isa<InvokeInst>(&I));
+      auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI)
+        continue;
+
+      const Value *Callee = CI->getCalledValue();
+      if (!canLongjmp(M, Callee))
+        continue;
+
+      Value *Threw = nullptr;
+      BasicBlock *Tail;
+      if (Callee->getName().startswith(InvokePrefix)) {
+        // If invoke wrapper has already been generated for this call in
+        // previous EH phase, search for the load instruction
+        // %__THREW__.val = __THREW__;
+        // in postamble after the invoke wrapper call
+        LoadInst *ThrewLI = nullptr;
+        StoreInst *ThrewResetSI = nullptr;
+        for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end();
+             I != IE; ++I) {
+          if (auto *LI = dyn_cast<LoadInst>(I))
+            if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
+              if (GV == ThrewGV) {
+                Threw = ThrewLI = LI;
+                break;
+              }
+        }
+        // Search for the store instruction after the load above
+        // __THREW__ = 0;
+        for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end();
+             I != IE; ++I) {
+          if (auto *SI = dyn_cast<StoreInst>(I))
+            if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
+              if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) {
+                ThrewResetSI = SI;
+                break;
+              }
+        }
+        assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke");
+        assert(ThrewResetSI && "Cannot find __THREW__ store after invoke");
+        Tail = SplitBlock(BB, ThrewResetSI->getNextNode());
+
+      } else {
+        // Wrap call with invoke wrapper and generate preamble/postamble
+        Threw = wrapInvoke(CI);
+        ToErase.push_back(CI);
+        Tail = SplitBlock(BB, CI->getNextNode());
+      }
+
+      // We need to replace the terminator in Tail - SplitBlock makes BB go
+      // straight to Tail, we need to check if a longjmp occurred, and go to the
+      // right setjmp-tail if so
+      ToErase.push_back(BB->getTerminator());
+
+      // Generate a function call to testSetjmp function and preamble/postamble
+      // code to figure out (1) whether longjmp occurred (2) if longjmp
+      // occurred, which setjmp it corresponds to
+      Value *Label = nullptr;
+      Value *LongjmpResult = nullptr;
+      BasicBlock *EndBB = nullptr;
+      wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label,
+                     LongjmpResult, EndBB);
+      assert(Label && LongjmpResult && EndBB);
+
+      // Create switch instruction
+      IRB.SetInsertPoint(EndBB);
+      SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
+      // -1 means no longjmp happened, continue normally (will hit the default
+      // switch case). 0 means a longjmp that is not ours to handle, needs a
+      // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
+      // 0).
+      for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
+        SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
+        SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
+      }
+
+      // We are splitting the block here, and must continue to find other calls
+      // in the block - which is now split. so continue to traverse in the Tail
+      BBs.push_back(Tail);
+    }
+  }
+
+  // Erase everything we no longer need in this function
+  for (Instruction *I : ToErase)
+    I->eraseFromParent();
+
+  // Free setjmpTable buffer before each return instruction
+  for (BasicBlock &BB : F) {
+    TerminatorInst *TI = BB.getTerminator();
+    if (isa<ReturnInst>(TI))
+      CallInst::CreateFree(SetjmpTable, TI);
+  }
+
+  // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
+  // (when buffer reallocation occurs)
+  // entry:
+  //   setjmpTableSize = 4;
+  //   setjmpTable = (int *) malloc(40);
+  //   setjmpTable[0] = 0;
+  // ...
+  // somebb:
+  //   setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+  //   setjmpTableSize = __tempRet0;
+  // So we need to make sure the SSA for these variables is valid so that every
+  // saveSetjmp and testSetjmp calls have the correct arguments.
+  SSAUpdater SetjmpTableSSA;
+  SSAUpdater SetjmpTableSizeSSA;
+  SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
+  SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
+  for (Instruction *I : SetjmpTableInsts)
+    SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
+  for (Instruction *I : SetjmpTableSizeInsts)
+    SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+
+  for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
+       UI != UE;) {
+    // Grab the use before incrementing the iterator.
+    Use &U = *UI;
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+      if (I->getParent() != &EntryBB)
+        SetjmpTableSSA.RewriteUse(U);
+  }
+  for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
+       UI != UE;) {
+    Use &U = *UI;
+    ++UI;
+    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+      if (I->getParent() != &EntryBB)
+        SetjmpTableSizeSSA.RewriteUse(U);
+  }
+
+  // Finally, our modifications to the cfg can break dominance of SSA variables.
+  // For example, in this code,
+  // if (x()) { .. setjmp() .. }
+  // if (y()) { .. longjmp() .. }
+  // We must split the longjmp block, and it can jump into the block splitted
+  // from setjmp one. But that means that when we split the setjmp block, it's
+  // first part no longer dominates its second part - there is a theoretically
+  // possible control flow path where x() is false, then y() is true and we
+  // reach the second part of the setjmp block, without ever reaching the first
+  // part. So, we rebuild SSA form here.
+  rebuildSSA(F);
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 225c5d3..ccf6a18 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -14,6 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyISelLowering.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
 using namespace llvm;
 
 WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
@@ -23,3 +26,37 @@ void WebAssemblyFunctionInfo::initWARegs() {
   unsigned Reg = UnusedReg;
   WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
 }
+
+void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                                Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+  const DataLayout &DL(F.getParent()->getDataLayout());
+  const WebAssemblyTargetLowering &TLI =
+      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+  SmallVector<EVT, 4> VTs;
+  ComputeValueVTs(TLI, DL, Ty, VTs);
+
+  for (EVT VT : VTs) {
+    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
+    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      ValueVTs.push_back(RegisterVT);
+  }
+}
+
+void llvm::ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+                               SmallVectorImpl<MVT> &Params,
+                               SmallVectorImpl<MVT> &Results) {
+  ComputeLegalValueVTs(F, TM, F.getReturnType(), Results);
+
+  if (Results.size() > 1) {
+    // WebAssembly currently can't lower returns of multiple values without
+    // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
+    // replace multiple return values with a pointer parameter.
+    Results.clear();
+    Params.push_back(
+        MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()));
+  }
+
+  for (auto &Arg : F.args())
+    ComputeLegalValueVTs(F, TM, Arg.getType(), Params);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 89f607d..756619b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -27,6 +27,8 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   MachineFunction &MF;
 
   std::vector<MVT> Params;
+  std::vector<MVT> Results;
+  std::vector<MVT> Locals;
 
   /// A mapping from CodeGen vreg index to WebAssembly register number.
   std::vector<unsigned> WARegs;
@@ -44,6 +46,10 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   // TLI::LowerVASTART
   unsigned VarargVreg = -1U;
 
+  // A virtual register holding the base pointer for functions that have
+  // overaligned values on the user stack.
+  unsigned BasePtrVreg = -1U;
+
  public:
   explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
   ~WebAssemblyFunctionInfo() override;
@@ -51,15 +57,28 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   void addParam(MVT VT) { Params.push_back(VT); }
   const std::vector<MVT> &getParams() const { return Params; }
 
+  void addResult(MVT VT) { Results.push_back(VT); }
+  const std::vector<MVT> &getResults() const { return Results; }
+
+  void addLocal(MVT VT) { Locals.push_back(VT); }
+  const std::vector<MVT> &getLocals() const { return Locals; }
+
   unsigned getVarargBufferVreg() const {
     assert(VarargVreg != -1U && "Vararg vreg hasn't been set");
     return VarargVreg;
   }
   void setVarargBufferVreg(unsigned Reg) { VarargVreg = Reg; }
 
+  unsigned getBasePointerVreg() const {
+    assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set");
+    return BasePtrVreg;
+  }
+  void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; }
+
   static const unsigned UnusedReg = -1u;
 
   void stackifyVReg(unsigned VReg) {
+    assert(MF.getRegInfo().getUniqueVRegDef(VReg));
     if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
       VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
     VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
@@ -88,6 +107,13 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   }
 };
 
+void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                          Type *Ty, SmallVectorImpl<MVT> &ValueVTs);
+
+void ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+                         SmallVectorImpl<MVT> &Params,
+                         SmallVectorImpl<MVT> &Results);
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 473de7d..5a3a741 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Optimize Live Intervals";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 4dc401a..96520aa 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 namespace {
 class OptimizeReturned final : public FunctionPass,
                                public InstVisitor<OptimizeReturned> {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Optimize Returned";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 56d44e6..32dde88 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -31,7 +31,7 @@ static cl::opt<bool> DisableWebAssemblyFallthroughReturnOpt(
 
 namespace {
 class WebAssemblyPeephole final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly late peephole optimizer";
   }
 
@@ -83,8 +83,8 @@ static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
   if (&MI != &MBB.back())
     return false;
 
-  // If the operand isn't stackified, insert a COPY_LOCAL to read the operand
-  // and stackify it.
+  // If the operand isn't stackified, insert a COPY to read the operand and
+  // stackify it.
   MachineOperand &MO = MI.getOperand(0);
   unsigned Reg = MO.getReg();
   if (!MFI.isVRegStackified(Reg)) {
@@ -119,25 +119,6 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64: {
-        // Store instructions return their value operand. If we ended up using
-        // the same register for both, replace it with a dead def so that it
-        // can use $drop instead.
-        MachineOperand &MO = MI.getOperand(0);
-        unsigned OldReg = MO.getReg();
-        unsigned NewReg =
-            MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
-        Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
-        break;
-      }
       case WebAssembly::CALL_I32:
       case WebAssembly::CALL_I64: {
         MachineOperand &Op1 = MI.getOperand(1);
@@ -169,22 +150,42 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::RETURN_I32:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32,
-            WebAssembly::COPY_LOCAL_I32);
+            WebAssembly::COPY_I32);
         break;
       case WebAssembly::RETURN_I64:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64,
-            WebAssembly::COPY_LOCAL_I64);
+            WebAssembly::COPY_I64);
         break;
       case WebAssembly::RETURN_F32:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32,
-            WebAssembly::COPY_LOCAL_F32);
+            WebAssembly::COPY_F32);
         break;
       case WebAssembly::RETURN_F64:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
-            WebAssembly::COPY_LOCAL_F64);
+            WebAssembly::COPY_F64);
+        break;
+      case WebAssembly::RETURN_v16i8:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v8i16:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v4i32:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v4f32:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
+            WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_VOID:
         if (!DisableWebAssemblyFallthroughReturnOpt &&
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 30444ac..473dcb7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -23,6 +23,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -40,7 +41,7 @@ public:
   WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Prepare For LiveIntervals";
   }
 
@@ -58,23 +59,10 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
   return new WebAssemblyPrepareForLiveIntervals();
 }
 
-/// Test whether the given instruction is an ARGUMENT.
-static bool IsArgument(const MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
-    return true;
-  default:
-    return false;
-  }
-}
-
 // Test whether the given register has an ARGUMENT def.
 static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
-  for (auto &Def : MRI.def_instructions(Reg))
-    if (IsArgument(&Def))
+  for (const auto &Def : MRI.def_instructions(Reg))
+    if (WebAssembly::isArgument(Def))
       return true;
   return false;
 }
@@ -122,10 +110,10 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
   // Move ARGUMENT_* instructions to the top of the entry block, so that their
   // liveness reflects the fact that these really are live-in values.
   for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
-    MachineInstr *MI = &*MII++;
-    if (IsArgument(MI)) {
-      MI->removeFromParent();
-      Entry.insert(Entry.begin(), MI);
+    MachineInstr &MI = *MII++;
+    if (WebAssembly::isArgument(MI)) {
+      MI.removeFromParent();
+      Entry.insert(Entry.begin(), &MI);
     }
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index dedd910..5fd4a8d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -35,7 +35,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyRegColoring() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Coloring";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 4a8fd96..e347082 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -32,7 +33,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyRegNumbering final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Numbering";
   }
 
@@ -68,20 +69,13 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
   // variables. Assign the numbers for them first.
   MachineBasicBlock &EntryMBB = MF.front();
   for (MachineInstr &MI : EntryMBB) {
-    switch (MI.getOpcode()) {
-    case WebAssembly::ARGUMENT_I32:
-    case WebAssembly::ARGUMENT_I64:
-    case WebAssembly::ARGUMENT_F32:
-    case WebAssembly::ARGUMENT_F64: {
-      int64_t Imm = MI.getOperand(1).getImm();
-      DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
-                   << Imm << "\n");
-      MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
+    if (!WebAssembly::isArgument(MI))
       break;
-    }
-    default:
-      break;
-    }
+
+    int64_t Imm = MI.getOperand(1).getImm();
+    DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
+                 << Imm << "\n");
+    MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
   }
 
   // Then assign regular WebAssembly registers for all remaining used
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 0aa3b62..32ee09e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -13,10 +13,10 @@
 /// This pass reorders instructions to put register uses and defs in an order
 /// such that they form single-use expression trees. Registers fitting this form
 /// are then marked as "stackified", meaning references to them are replaced by
-/// "push" and "pop" from the stack.
+/// "push" and "pop" from the value stack.
 ///
 /// This is primarily a code size optimization, since temporary values on the
-/// expression don't need to be named.
+/// value stack don't need to be named.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -24,6 +24,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -39,7 +40,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyRegStackify final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Stackify";
   }
 
@@ -73,19 +74,50 @@ FunctionPass *llvm::createWebAssemblyRegStackify() {
 // expression stack ordering constraints for an instruction which is on
 // the expression stack.
 static void ImposeStackOrdering(MachineInstr *MI) {
-  // Write the opaque EXPR_STACK register.
-  if (!MI->definesRegister(WebAssembly::EXPR_STACK))
-    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+  // Write the opaque VALUE_STACK register.
+  if (!MI->definesRegister(WebAssembly::VALUE_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
                                              /*isDef=*/true,
                                              /*isImp=*/true));
 
-  // Also read the opaque EXPR_STACK register.
-  if (!MI->readsRegister(WebAssembly::EXPR_STACK))
-    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+  // Also read the opaque VALUE_STACK register.
+  if (!MI->readsRegister(WebAssembly::VALUE_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
                                              /*isDef=*/false,
                                              /*isImp=*/true));
 }
 
+// Convert an IMPLICIT_DEF instruction into an instruction which defines
+// a constant zero value.
+static void ConvertImplicitDefToConstZero(MachineInstr *MI,
+                                          MachineRegisterInfo &MRI,
+                                          const TargetInstrInfo *TII,
+                                          MachineFunction &MF) {
+  assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
+
+  const auto *RegClass =
+      MRI.getRegClass(MI->getOperand(0).getReg());
+  if (RegClass == &WebAssembly::I32RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_I32));
+    MI->addOperand(MachineOperand::CreateImm(0));
+  } else if (RegClass == &WebAssembly::I64RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_I64));
+    MI->addOperand(MachineOperand::CreateImm(0));
+  } else if (RegClass == &WebAssembly::F32RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_F32));
+    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+        Type::getFloatTy(MF.getFunction()->getContext())));
+    MI->addOperand(MachineOperand::CreateFPImm(Val));
+  } else if (RegClass == &WebAssembly::F64RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_F64));
+    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+        Type::getDoubleTy(MF.getFunction()->getContext())));
+    MI->addOperand(MachineOperand::CreateFPImm(Val));
+  } else {
+    llvm_unreachable("Unexpected reg class");
+  }
+}
+
 // Determine whether a call to the callee referenced by
 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
 // effects.
@@ -130,7 +162,7 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
     return;
 
   // Check for loads.
-  if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
+  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
     Read = true;
 
   // Check for stores.
@@ -255,7 +287,7 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def,
   const VNInfo *DefVNI = LI.getVNInfoAt(
       LIS.getInstructionIndex(*Def).getRegSlot());
   assert(DefVNI);
-  for (auto I : MRI.use_nodbg_operands(Reg)) {
+  for (auto &I : MRI.use_nodbg_operands(Reg)) {
     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
     if (Result.valueIn() == DefVNI) {
       if (!Result.isKill())
@@ -274,11 +306,11 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def,
 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
 // more precise.
 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
-                         AliasAnalysis &AA, const LiveIntervals &LIS,
-                         const MachineRegisterInfo &MRI) {
+                         AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
   assert(Def->getParent() == Insert->getParent());
 
   // Check for register dependencies.
+  SmallVector<unsigned, 4> MutableRegisters;
   for (const MachineOperand &MO : Def->operands()) {
     if (!MO.isReg() || MO.isUndef())
       continue;
@@ -301,21 +333,11 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
       return false;
     }
 
-    // Ask LiveIntervals whether moving this virtual register use or def to
-    // Insert will change which value numbers are seen.
-    // 
-    // If the operand is a use of a register that is also defined in the same
-    // instruction, test that the newly defined value reaches the insert point,
-    // since the operand will be moving along with the def.
-    const LiveInterval &LI = LIS.getInterval(Reg);
-    VNInfo *DefVNI =
-        (MO.isDef() || Def->definesRegister(Reg)) ?
-        LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
-        LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
-    assert(DefVNI && "Instruction input missing value number");
-    VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
-    if (InsVNI && DefVNI != InsVNI)
-      return false;
+    // If one of the operands isn't in SSA form, it has different values at
+    // different times, and we need to make sure we don't move our use across
+    // a different def.
+    if (!MO.isDef() && !MRI.hasOneDef(Reg))
+      MutableRegisters.push_back(Reg);
   }
 
   bool Read = false, Write = false, Effects = false, StackPointer = false;
@@ -323,7 +345,8 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
 
   // If the instruction does not access memory and has no side effects, it has
   // no additional dependencies.
-  if (!Read && !Write && !Effects && !StackPointer)
+  bool HasMutableRegisters = !MutableRegisters.empty();
+  if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
     return true;
 
   // Scan through the intervening instructions between Def and Insert.
@@ -343,6 +366,11 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
       return false;
     if (StackPointer && InterveningStackPointer)
       return false;
+
+    for (unsigned Reg : MutableRegisters)
+      for (const MachineOperand &MO : I->operands())
+        if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+          return false;
   }
 
   return true;
@@ -360,7 +388,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
   const MachineInstr *OneUseInst = OneUse.getParent();
   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
 
-  for (const MachineOperand &Use : MRI.use_operands(Reg)) {
+  for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
     if (&Use == &OneUse)
       continue;
 
@@ -384,7 +412,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
         //
         // This is needed as a consequence of using implicit get_locals for
         // uses and implicit set_locals for defs.
-        if (UseInst->getDesc().getNumDefs() == 0) 
+        if (UseInst->getDesc().getNumDefs() == 0)
           return false;
         const MachineOperand &MO = UseInst->getOperand(0);
         if (!MO.isReg())
@@ -408,16 +436,18 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
   return true;
 }
 
-/// Get the appropriate tee_local opcode for the given register class.
-static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
+/// Get the appropriate tee opcode for the given register class.
+static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
-    return WebAssembly::TEE_LOCAL_I32;
+    return WebAssembly::TEE_I32;
   if (RC == &WebAssembly::I64RegClass)
-    return WebAssembly::TEE_LOCAL_I64;
+    return WebAssembly::TEE_I64;
   if (RC == &WebAssembly::F32RegClass)
-    return WebAssembly::TEE_LOCAL_F32;
+    return WebAssembly::TEE_F32;
   if (RC == &WebAssembly::F64RegClass)
-    return WebAssembly::TEE_LOCAL_F64;
+    return WebAssembly::TEE_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::TEE_V128;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -515,8 +545,8 @@ static MachineInstr *RematerializeCheapDef(
 
 /// A multiple-use def in the same block with no intervening memory or register
 /// dependencies; move the def down, nest it with the current instruction, and
-/// insert a tee_local to satisfy the rest of the uses. As an illustration,
-/// rewrite this:
+/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
+/// this:
 ///
 ///    Reg = INST ...        // Def
 ///    INST ..., Reg, ...    // Insert
@@ -526,7 +556,7 @@ static MachineInstr *RematerializeCheapDef(
 /// to this:
 ///
 ///    DefReg = INST ...     // Def (to become the new Insert)
-///    TeeReg, Reg = TEE_LOCAL_... DefReg
+///    TeeReg, Reg = TEE_... DefReg
 ///    INST ..., TeeReg, ... // Insert
 ///    INST ..., Reg, ...
 ///    INST ..., Reg, ...
@@ -549,7 +579,7 @@ static MachineInstr *MoveAndTeeForMultiUse(
   unsigned DefReg = MRI.createVirtualRegister(RegClass);
   MachineOperand &DefMO = Def->getOperand(0);
   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
-                              TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
+                              TII->get(GetTeeOpcode(RegClass)), TeeReg)
                           .addReg(Reg, RegState::Define)
                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
   Op.setReg(TeeReg);
@@ -749,8 +779,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         if (TargetRegisterInfo::isPhysicalRegister(Reg))
           continue;
 
-        // Identify the definition for this register at this point. Most
-        // registers are in SSA form here so we try a quick MRI query first.
+        // Identify the definition for this register at this point.
         MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
         if (!Def)
           continue;
@@ -762,20 +791,17 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
 
         // Argument instructions represent live-in registers and not real
         // instructions.
-        if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_F64)
+        if (WebAssembly::isArgument(*Def))
           continue;
 
         // Decide which strategy to take. Prefer to move a single-use value
-        // over cloning it, and prefer cloning over introducing a tee_local.
+        // over cloning it, and prefer cloning over introducing a tee.
         // For moving, we require the def to be in the same block as the use;
         // this makes things simpler (LiveIntervals' handleMove function only
         // supports intra-block moves) and it's MachineSink's job to catch all
         // the sinking opportunities anyway.
         bool SameBlock = Def->getParent() == &MBB;
-        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
+        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
                        !TreeWalker.IsOnStack(Reg);
         if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
@@ -796,6 +822,12 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
           continue;
         }
 
+        // If the instruction we just stackified is an IMPLICIT_DEF, convert it
+        // to a constant 0 so that the def is explicit, and the push/pop
+        // correspondence is maintained.
+        if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+          ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
+
         // We stackified an operand. Add the defining instruction's operands to
         // the worklist stack now to continue to build an ever deeper tree.
         Commuting.Reset();
@@ -806,19 +838,18 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
       // the next instruction we can build a tree on.
       if (Insert != &*MII) {
         ImposeStackOrdering(&*MII);
-        MII = std::prev(
-            llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
+        MII = MachineBasicBlock::iterator(Insert).getReverse();
         Changed = true;
       }
     }
   }
 
-  // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
+  // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
   // that it never looks like a use-before-def.
   if (Changed) {
-    MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
+    MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
     for (MachineBasicBlock &MBB : MF)
-      MBB.addLiveIn(WebAssembly::EXPR_STACK);
+      MBB.addLiveIn(WebAssembly::VALUE_STACK);
   }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 239fe89..9367464 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -61,19 +61,25 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
 
+  assert(MFI.getObjectSize(FrameIndex) != 0 &&
+         "We assume that variable-sized objects have already been lowered, "
+         "and don't use FrameIndex operands.");
+  unsigned FrameRegister = getFrameRegister(MF);
+
   // If this is the address operand of a load or store, make it relative to SP
   // and fold the frame offset directly in.
-  if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) {
-    assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
-    int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
+  if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
+      (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
+    assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
+    int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
 
     if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
       MI.getOperand(FIOperandNum - 1).setImm(Offset);
       MI.getOperand(FIOperandNum)
-          .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+          .ChangeToRegister(FrameRegister, /*IsDef=*/false);
       return;
     }
   }
@@ -94,7 +100,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
           MachineOperand &ImmMO = Def->getOperand(1);
           ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
           MI.getOperand(FIOperandNum)
-              .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+              .ChangeToRegister(FrameRegister, /*IsDef=*/false);
           return;
         }
       }
@@ -104,7 +110,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   // Otherwise create an i32.add SP, offset and make it the operand.
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
-  unsigned FIRegOperand = WebAssembly::SP32;
+  unsigned FIRegOperand = FrameRegister;
   if (FrameOffset) {
     // Create i32.add SP, offset and make it the operand.
     const TargetRegisterClass *PtrRC =
@@ -116,7 +122,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
     FIRegOperand = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
             FIRegOperand)
-        .addReg(WebAssembly::SP32)
+        .addReg(FrameRegister)
         .addReg(OffsetOp);
   }
   MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 80a83fa..9088810 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -39,9 +39,11 @@ def SP64 : WebAssemblyReg<"%SP64">;
 def F32_0 : WebAssemblyReg<"%f32.0">;
 def F64_0 : WebAssemblyReg<"%f64.0">;
 
-// The expression stack "register". This is an opaque entity which serves to
-// order uses and defs that must remain in LIFO order.
-def EXPR_STACK : WebAssemblyReg<"STACK">;
+def V128_0: WebAssemblyReg<"%v128">;
+
+// The value stack "register". This is an opaque entity which serves to order
+// uses and defs that must remain in LIFO order.
+def VALUE_STACK : WebAssemblyReg<"STACK">;
 
 // The incoming arguments "register". This is an opaque entity which serves to
 // order the ARGUMENT instructions that are emulating live-in registers and
@@ -56,3 +58,5 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
 def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
 def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
+def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;
+
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 11bda47..9e944df 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -39,7 +39,7 @@ public:
   WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Replace Physical Registers";
   }
 
@@ -76,7 +76,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
   for (unsigned PReg = WebAssembly::NoRegister + 1;
        PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) {
     // Skip fake registers that are never used explicitly.
-    if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS)
+    if (PReg == WebAssembly::VALUE_STACK || PReg == WebAssembly::ARGUMENTS)
       continue;
 
     // Replace explicit uses of the physical register with a virtual register.
@@ -88,6 +88,8 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
         if (VReg == WebAssembly::NoRegister)
           VReg = MRI.createVirtualRegister(RC);
         MO.setReg(VReg);
+        if (MO.getParent()->isDebugValue())
+          MO.setIsDebug();
         Changed = true;
       }
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 4ebea68..2441ead 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -30,7 +30,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Set p2align Operands";
   }
 
@@ -50,6 +50,27 @@ FunctionPass *llvm::createWebAssemblySetP2AlignOperands() {
   return new WebAssemblySetP2AlignOperands();
 }
 
+static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
+  assert(MI.getOperand(OperandNo).getImm() == 0 &&
+         "ISel should set p2align operands to 0");
+  assert(MI.hasOneMemOperand() &&
+         "Load and store instructions have exactly one mem operand");
+  assert((*MI.memoperands_begin())->getSize() ==
+             (UINT64_C(1)
+              << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+         "Default p2align value should be natural");
+  assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
+             WebAssembly::OPERAND_P2ALIGN &&
+         "Load and store instructions should have a p2align operand");
+  uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
+
+  // WebAssembly does not currently support supernatural alignment.
+  P2Align = std::min(
+      P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
+
+  MI.getOperand(OperandNo).setImm(P2Align);
+}
+
 bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
     dbgs() << "********** Set p2align Operands **********\n"
@@ -75,6 +96,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::LOAD16_U_I64:
       case WebAssembly::LOAD32_S_I64:
       case WebAssembly::LOAD32_U_I64:
+        RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
+        break;
       case WebAssembly::STORE_I32:
       case WebAssembly::STORE_I64:
       case WebAssembly::STORE_F32:
@@ -83,27 +106,9 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::STORE16_I32:
       case WebAssembly::STORE8_I64:
       case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64: {
-        assert(MI.getOperand(3).getImm() == 0 &&
-               "ISel should set p2align operands to 0");
-        assert(MI.hasOneMemOperand() &&
-               "Load and store instructions have exactly one mem operand");
-        assert((*MI.memoperands_begin())->getSize() ==
-                   (UINT64_C(1)
-                    << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
-               "Default p2align value should be natural");
-        assert(MI.getDesc().OpInfo[3].OperandType ==
-                   WebAssembly::OPERAND_P2ALIGN &&
-               "Load and store instructions should have a p2align operand");
-        uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
-
-        // WebAssembly does not currently support supernatural alignment.
-        P2Align = std::min(
-            P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
-
-        MI.getOperand(3).setImm(P2Align);
+      case WebAssembly::STORE32_I64:
+        RewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
         break;
-      }
       default:
         break;
       }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index 1e9a773..34ec6f2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -46,9 +46,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
-    return "WebAssembly Store Results";
-  }
+  StringRef getPassName() const override { return "WebAssembly Store Results"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -90,7 +88,7 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
 
   SmallVector<SlotIndex, 4> Indices;
 
-  for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+  for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); I != E;) {
     MachineOperand &O = *I++;
     MachineInstr *Where = O.getParent();
 
@@ -139,15 +137,6 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
   return Changed;
 }
 
-static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI,
-                          const MachineRegisterInfo &MRI,
-                          MachineDominatorTree &MDT,
-                          LiveIntervals &LIS) {
-  unsigned ToReg = MI.getOperand(0).getReg();
-  unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
-  return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
-}
-
 static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
                          const MachineRegisterInfo &MRI,
                          MachineDominatorTree &MDT,
@@ -202,17 +191,6 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64:
-        Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS);
-        break;
       case WebAssembly::CALL_I32:
       case WebAssembly::CALL_I64:
         Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 32154af..f5ef35a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -29,10 +29,28 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm"
 
+// Emscripten's asm.js-style exception handling
+static cl::opt<bool> EnableEmException(
+    "enable-emscripten-cxx-exceptions",
+    cl::desc("WebAssembly Emscripten-style exception handling"),
+    cl::init(false));
+
+// Emscripten's asm.js-style setjmp/longjmp handling
+static cl::opt<bool> EnableEmSjLj(
+    "enable-emscripten-sjlj",
+    cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
+    cl::init(false));
+
 extern "C" void LLVMInitializeWebAssemblyTarget() {
   // Register the target.
-  RegisterTargetMachine<WebAssemblyTargetMachine> X(TheWebAssemblyTarget32);
-  RegisterTargetMachine<WebAssemblyTargetMachine> Y(TheWebAssemblyTarget64);
+  RegisterTargetMachine<WebAssemblyTargetMachine> X(
+      getTheWebAssemblyTarget32());
+  RegisterTargetMachine<WebAssemblyTargetMachine> Y(
+      getTheWebAssemblyTarget64());
+
+  // Register exception handling pass to opt
+  initializeWebAssemblyLowerEmscriptenEHSjLjPass(
+      *PassRegistry::getPassRegistry());
 }
 
 //===----------------------------------------------------------------------===//
@@ -57,10 +75,10 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
                         TT, CPU, FS, Options, getEffectiveRelocModel(RM),
                         CM, OL),
       TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
-  // WebAssembly type-checks expressions, but a noreturn function with a return
+  // WebAssembly type-checks instructions, but a noreturn function with a return
   // type that doesn't match the context will cause a check failure. So we lower
   // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
-  // 'unreachable' expression which is meant for that case.
+  // 'unreachable' instructions which is meant for that case.
   this->Options.TrapUnreachable = true;
 
   initAsmInfo();
@@ -145,10 +163,31 @@ void WebAssemblyPassConfig::addIRPasses() {
     // control specifically what gets lowered.
     addPass(createAtomicExpandPass(TM));
 
+  // Fix function bitcasts, as WebAssembly requires caller and callee signatures
+  // to match.
+  addPass(createWebAssemblyFixFunctionBitcasts());
+
   // Optimize "returned" function attributes.
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createWebAssemblyOptimizeReturned());
 
+  // If exception handling is not enabled and setjmp/longjmp handling is
+  // enabled, we lower invokes into calls and delete unreachable landingpad
+  // blocks. Lowering invokes when there is no EH support is done in
+  // TargetPassConfig::addPassesToHandleExceptions, but this runs after this
+  // function and SjLj handling expects all invokes to be lowered before.
+  if (!EnableEmException) {
+    addPass(createLowerInvokePass());
+    // The lower invoke pass may create unreachable code. Remove it in order not
+    // to process dead blocks in setjmp/longjmp handling.
+    addPass(createUnreachableBlockEliminationPass());
+  }
+
+  // Handle exceptions and setjmp/longjmp if enabled.
+  if (EnableEmException || EnableEmSjLj)
+    addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
+                                                   EnableEmSjLj));
+
   TargetPassConfig::addIRPasses();
 }
 
@@ -175,7 +214,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
   // Has no asserts of its own, but was not written to handle virtual regs.
   disablePass(&ShrinkWrapID);
 
-  // These functions all require the AllVRegsAllocated property.
+  // These functions all require the NoVRegs property.
   disablePass(&MachineCopyPropagationID);
   disablePass(&PostRASchedulerID);
   disablePass(&FuncletLayoutID);
@@ -194,6 +233,11 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   // colored, and numbered with the rest of the registers.
   addPass(createWebAssemblyReplacePhysRegs());
 
+  // Rewrite pseudo call_indirect instructions as real instructions.
+  // This needs to run before register stackification, because we change the
+  // order of the arguments.
+  addPass(createWebAssemblyCallIndirectFixup());
+
   if (getOptLevel() != CodeGenOpt::None) {
     // LiveIntervals isn't commonly run this late. Re-establish preconditions.
     addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -204,7 +248,7 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     // Prepare store instructions for register stackifying.
     addPass(createWebAssemblyStoreResults());
 
-    // Mark registers as representing wasm's expression stack. This is a key
+    // Mark registers as representing wasm's value stack. This is a key
     // code-compression technique in WebAssembly. We run this pass (and
     // StoreResults above) very late, so that it sees as much code as possible,
     // including code emitted by PEI and expanded by late tail duplication.
@@ -216,6 +260,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     addPass(createWebAssemblyRegColoring());
   }
 
+  // Insert explicit get_local and set_local operators.
+  addPass(createWebAssemblyExplicitLocals());
+
   // Eliminate multiple-entry loops.
   addPass(createWebAssemblyFixIrreducibleControlFlow());
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index bf546da..47aadf9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -46,7 +46,7 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) {
 unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
 
   unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
       Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index fe99e96..f658609 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -42,13 +42,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
@@ -68,7 +61,8 @@ public:
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
   /// @}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
new file mode 100644
index 0000000..a0049c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -0,0 +1,71 @@
+//===-- WebAssemblyUtilities.cpp - WebAssembly Utility Functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements several utility functions for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyUtilities.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+using namespace llvm;
+
+bool WebAssembly::isArgument(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::ARGUMENT_I32:
+  case WebAssembly::ARGUMENT_I64:
+  case WebAssembly::ARGUMENT_F32:
+  case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4f32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WebAssembly::isCopy(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::COPY_I32:
+  case WebAssembly::COPY_I64:
+  case WebAssembly::COPY_F32:
+  case WebAssembly::COPY_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WebAssembly::isTee(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::TEE_I32:
+  case WebAssembly::TEE_I64:
+  case WebAssembly::TEE_F32:
+  case WebAssembly::TEE_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Test whether MI is a child of some other node in an expression tree.
+bool WebAssembly::isChild(const MachineInstr &MI,
+                          const WebAssemblyFunctionInfo &MFI) {
+  if (MI.getNumOperands() == 0)
+    return false;
+  const MachineOperand &MO = MI.getOperand(0);
+  if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
+    return false;
+  unsigned Reg = MO.getReg();
+  return TargetRegisterInfo::isVirtualRegister(Reg) &&
+         MFI.isVRegStackified(Reg);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
new file mode 100644
index 0000000..eb11440
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -0,0 +1,34 @@
+//===-- WebAssemblyUtilities - WebAssembly Utility Functions ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the WebAssembly-specific
+/// utility functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
+
+namespace llvm {
+
+class MachineInstr;
+class WebAssemblyFunctionInfo;
+
+namespace WebAssembly {
+
+bool isArgument(const MachineInstr &MI);
+bool isCopy(const MachineInstr &MI);
+bool isTee(const MachineInstr &MI);
+bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
+
+} // end namespace WebAssembly
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index f074000..8dd5e8a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -66,4 +66,3 @@ pr41935.c
 920728-1.c
 pr28865.c
 widechar-2.c
-pr41463.c