35 files changed, 3283 insertions, 1502 deletions
diff --git a/contrib/gcc/ChangeLog.gcc43 b/contrib/gcc/ChangeLog.gcc43
index 3f893ac..05f0107 100644
--- a/contrib/gcc/ChangeLog.gcc43
+++ b/contrib/gcc/ChangeLog.gcc43
@@ -1,3 +1,9 @@
+2007-06-05  Joerg Wunsch  <j.gnu@uriah.heep.sax.de> (r23479)
+
+	PR preprocessor/23479
+	* doc/extend.texi: Document the 0b-prefixed binary integer
+	constant extension.
+	
 2007-05-01  Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> (r124341)
 
 	* doc/invoke.texi: Fix typo, 'AMD Family 10h core' instead of 
diff --git a/contrib/gcc/config/arm/libunwind.S b/contrib/gcc/config/arm/libunwind.S
index 06e1310..81e4236 100644
--- a/contrib/gcc/config/arm/libunwind.S
+++ b/contrib/gcc/config/arm/libunwind.S
@@ -116,5 +116,6 @@ UNWIND_WRAPPER _Unwind_RaiseException 1
 UNWIND_WRAPPER _Unwind_Resume 1
 UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1
 UNWIND_WRAPPER _Unwind_ForcedUnwind 3
+UNWIND_WRAPPER _Unwind_Backtrace 2
 
-#endif  /* __symbian__ */
+#endif  /* ndef __symbian__ */
diff --git a/contrib/gcc/config/arm/unwind-arm.c b/contrib/gcc/config/arm/unwind-arm.c
index 9d2513b..47354b4 100644
--- a/contrib/gcc/config/arm/unwind-arm.c
+++ b/contrib/gcc/config/arm/unwind-arm.c
@@ -747,6 +747,66 @@ _Unwind_DeleteException (_Unwind_Exception * exc)
 }
 
 
+/* Perform stack backtrace through unwind data.  */
+_Unwind_Reason_Code
+__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument,
+		       phase2_vrs * entry_vrs);
+_Unwind_Reason_Code
+__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument,
+		       phase2_vrs * entry_vrs)
+{
+  phase1_vrs saved_vrs;
+  _Unwind_Reason_Code code;
+
+  _Unwind_Control_Block ucb;
+  _Unwind_Control_Block *ucbp = &ucb;
+
+  /* Set the pc to the call site.  */
+  entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR];
+
+  /* Save the core registers.  */
+  saved_vrs.core = entry_vrs->core;
+  /* Set demand-save flags.  */
+  saved_vrs.demand_save_flags = ~(_uw) 0;
+  
+  do
+    {
+      /* Find the entry for this routine.  */
+      if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK)
+	{
+	  code = _URC_FAILURE;
+	  break;
+	}
+
+      /* The dwarf unwinder assumes the context structure holds things
+	 like the function and LSDA pointers.  The ARM implementation
+	 caches these in the exception header (UCB).  To avoid
+	 rewriting everything we make the virtual IP register point at
+	 the UCB.  */
+      _Unwind_SetGR((_Unwind_Context *)&saved_vrs, 12, (_Unwind_Ptr) ucbp);
+
+      /* Call trace function.  */
+      if ((*trace) ((_Unwind_Context *) &saved_vrs, trace_argument) 
+	  != _URC_NO_REASON)
+	{
+	  code = _URC_FAILURE;
+	  break;
+	}
+
+      /* Call the pr to decide what to do.  */
+      code = ((personality_routine) UCB_PR_ADDR (ucbp))
+	(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, 
+	 ucbp, (void *) &saved_vrs);
+    }
+  while (code != _URC_END_OF_STACK
+	 && code != _URC_FAILURE);
+
+ finish:
+  restore_non_core_regs (&saved_vrs);
+  return code;
+}
+
+
 /* Common implementation for ARM ABI defined personality routines.
    ID is the index of the personality routine, other arguments are as defined
    by __aeabi_unwind_cpp_pr{0,1,2}.  */
@@ -1014,3 +1074,19 @@ _Unwind_GetTextRelBase (_Unwind_Context *context __attribute__ ((unused)))
 {
   abort ();
 }
+
+#ifdef __FreeBSD__
+/* FreeBSD expects these to be functions */
+_Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+  return _Unwind_GetGR (context, 15) & ~(_Unwind_Word)1;
+}
+
+_Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+  *ip_before_insn = 0;
+  return _Unwind_GetGR (context, 15) & ~(_Unwind_Word)1;
+}
+#endif
diff --git a/contrib/gcc/config/arm/unwind-arm.h b/contrib/gcc/config/arm/unwind-arm.h
index 0811f2c..8a5c5ce 100644
--- a/contrib/gcc/config/arm/unwind-arm.h
+++ b/contrib/gcc/config/arm/unwind-arm.h
@@ -205,6 +205,13 @@ extern "C" {
 	_Unwind_Control_Block *, struct _Unwind_Context *, void *);
   _Unwind_Reason_Code _Unwind_ForcedUnwind (_Unwind_Control_Block *,
 					    _Unwind_Stop_Fn, void *);
+  /* @@@ Use unwind data to perform a stack backtrace.  The trace callback
+     is called for every stack frame in the call chain, but no cleanup
+     actions are performed.  */
+  typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn) (_Unwind_Context *, void *);
+  _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn,
+					void*);
+
   _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *);
   void _Unwind_Complete(_Unwind_Control_Block *ucbp);
   void _Unwind_DeleteException (_Unwind_Exception *);
@@ -246,12 +253,17 @@ extern "C" {
       return val;
     }
 
+#ifndef __FreeBSD__
   /* Return the address of the instruction, not the actual IP value.  */
 #define _Unwind_GetIP(context) \
   (_Unwind_GetGR (context, 15) & ~(_Unwind_Word)1)
 
 #define _Unwind_GetIPInfo(context, ip_before_insn) \
   (*ip_before_insn = 0, _Unwind_GetGR (context, 15) & ~(_Unwind_Word)1)
+#else
+  _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *);
+  _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *);
+#endif
 
   static inline void
   _Unwind_SetGR (_Unwind_Context *context, int regno, _Unwind_Word val)
diff --git a/contrib/gcc/doc/extend.texi b/contrib/gcc/doc/extend.texi
index d7a1494..d27af10 100644
--- a/contrib/gcc/doc/extend.texi
+++ b/contrib/gcc/doc/extend.texi
@@ -81,6 +81,7 @@ extensions, accepted by GCC in C89 mode and in C++.
 * Pragmas::             Pragmas accepted by GCC.
 * Unnamed Fields::      Unnamed struct/union fields within structs/unions.
 * Thread-Local::        Per-thread variables.
+* Binary constants::    Binary constants using the @samp{0b} prefix.
 @end menu
 
 @node Statement Exprs
@@ -10424,6 +10425,28 @@ Non-@code{static} members shall not be @code{__thread}.
 @end quotation
 @end itemize
 
+@node Binary constants
+@section Binary constants using the @samp{0b} prefix
+@cindex Binary constants using the @samp{0b} prefix
+
+Integer constants can be written as binary constants, consisting of a
+sequence of @samp{0} and @samp{1} digits, prefixed by @samp{0b} or
+@samp{0B}.  This is particularly useful in environments that operate a
+lot on the bit-level (like microcontrollers).
+
+The following statements are identical:
+
+@smallexample
+i =       42;
+i =     0x2a;
+i =      052;
+i = 0b101010;
+@end smallexample
+
+The type of these constants follows the same rules as for octal or
+hexadecimal integer constants, so suffixes like @samp{L} or @samp{UL}
+can be applied.
+
 @node C++ Extensions
 @chapter Extensions to the C++ Language
 @cindex extensions, C++ language
diff --git a/contrib/gcclibs/libcpp/expr.c b/contrib/gcclibs/libcpp/expr.c
index bf8baaf..24fcb1b8 100644
--- a/contrib/gcclibs/libcpp/expr.c
+++ b/contrib/gcclibs/libcpp/expr.c
@@ -188,6 +188,11 @@ cpp_classify_number (cpp_reader *pfile, const cpp_token *token)
 	  radix = 16;
 	  str++;
 	}
+      else if ((*str == 'b' || *str == 'B') && (str[1] == '0' || str[1] == '1'))
+	{
+	  radix = 2;
+	  str++;
+	}
     }
 
   /* Now scan for a well-formed integer or float.  */
@@ -226,10 +231,22 @@ cpp_classify_number (cpp_reader *pfile, const cpp_token *token)
     radix = 10;
 
   if (max_digit >= radix)
-    SYNTAX_ERROR2 ("invalid digit \"%c\" in octal constant", '0' + max_digit);
+    {
+      if (radix == 2)
+	SYNTAX_ERROR2 ("invalid digit \"%c\" in binary constant", '0' + max_digit);
+      else
+	SYNTAX_ERROR2 ("invalid digit \"%c\" in octal constant", '0' + max_digit);
+    }
 
   if (float_flag != NOT_FLOAT)
     {
+      if (radix == 2)
+	{
+	  cpp_error (pfile, CPP_DL_ERROR,
+		     "invalid prefix \"0b\" for floating constant");
+	  return CPP_N_INVALID;
+	}
+
       if (radix == 16 && CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, c99))
 	cpp_error (pfile, CPP_DL_PEDWARN,
 		   "use of C99 hexadecimal floating constant");
@@ -321,11 +338,16 @@ cpp_classify_number (cpp_reader *pfile, const cpp_token *token)
   if ((result & CPP_N_IMAGINARY) && CPP_PEDANTIC (pfile))
     cpp_error (pfile, CPP_DL_PEDWARN,
 	       "imaginary constants are a GCC extension");
+  if (radix == 2 && CPP_PEDANTIC (pfile))
+    cpp_error (pfile, CPP_DL_PEDWARN,
+	       "binary constants are a GCC extension");
 
   if (radix == 10)
     result |= CPP_N_DECIMAL;
   else if (radix == 16)
     result |= CPP_N_HEX;
+  else if (radix == 2)
+    result |= CPP_N_BINARY;
   else
     result |= CPP_N_OCTAL;
 
@@ -376,6 +398,11 @@ cpp_interpret_integer (cpp_reader *pfile, const cpp_token *token,
 	  base = 16;
 	  p += 2;
 	}
+      else if ((type & CPP_N_RADIX) == CPP_N_BINARY)
+	{
+	  base = 2;
+	  p += 2;
+	}
 
       /* We can add a digit to numbers strictly less than this without
 	 needing the precision and slowness of double integers.  */
@@ -431,12 +458,25 @@ static cpp_num
 append_digit (cpp_num num, int digit, int base, size_t precision)
 {
   cpp_num result;
-  unsigned int shift = 3 + (base == 16);
+  unsigned int shift;
   bool overflow;
   cpp_num_part add_high, add_low;
 
-  /* Multiply by 8 or 16.  Catching this overflow here means we don't
+  /* Multiply by 2, 8 or 16.  Catching this overflow here means we don't
      need to worry about add_high overflowing.  */
+  switch (base)
+    {
+    case 2:
+      shift = 1;
+      break;
+
+    case 16:
+      shift = 4;
+      break;
+
+    default:
+      shift = 3;
+    }
   overflow = !!(num.high >> (PART_PRECISION - shift));
   result.high = num.high << shift;
   result.low = num.low << shift;
diff --git a/contrib/gcclibs/libcpp/include/cpplib.h b/contrib/gcclibs/libcpp/include/cpplib.h
index 851a2e3..7fd73b8 100644
--- a/contrib/gcclibs/libcpp/include/cpplib.h
+++ b/contrib/gcclibs/libcpp/include/cpplib.h
@@ -745,6 +745,7 @@ struct cpp_num
 #define CPP_N_DECIMAL	0x0100
 #define CPP_N_HEX	0x0200
 #define CPP_N_OCTAL	0x0400
+#define CPP_N_BINARY	0x0800
 
 #define CPP_N_UNSIGNED	0x1000	/* Properties.  */
 #define CPP_N_IMAGINARY	0x2000
diff --git a/lib/libc/gen/errlst.c b/lib/libc/gen/errlst.c
index 7b4fd62..f8fe968 100644
--- a/lib/libc/gen/errlst.c
+++ b/lib/libc/gen/errlst.c
@@ -34,6 +34,7 @@ static char sccsid[] = "@(#)errlst.c	8.2 (Berkeley) 11/16/93";
 __FBSDID("$FreeBSD$");
 
 #include <stdio.h>
+#include "errlst.h"
 
 const char *const sys_errlist[] = {
 	"No error: 0",				/*  0 - ENOERROR */
@@ -156,3 +157,8 @@ const char *const sys_errlist[] = {
 	"Previous owner died",			/* 96 - EOWNERDEAD */
 };
 const int sys_nerr = sizeof(sys_errlist) / sizeof(sys_errlist[0]);
+
+#ifdef PIC
+__strong_reference(sys_errlist, __hidden_sys_errlist);
+__strong_reference(sys_nerr, __hidden_sys_nerr);
+#endif
diff --git a/lib/libc/include/errlst.h b/lib/libc/include/errlst.h
new file mode 100644
index 0000000..4e9e29f
--- /dev/null
+++ b/lib/libc/include/errlst.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 2013 Jilles Tjoelker
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __ERRLST_H__
+#define __ERRLST_H__
+
+#include <sys/cdefs.h>
+
+#ifdef PIC
+/* If the main executable imports these, do not use its copy from libc.so. */
+extern const char *const __hidden_sys_errlist[] __hidden;
+extern const int __hidden_sys_nerr __hidden;
+#else
+#define __hidden_sys_errlist sys_errlist
+#define __hidden_sys_nerr sys_nerr
+#endif
+
+#endif /* __ERRLST_H__ */
diff --git a/lib/libc/stdio/xprintf_errno.c b/lib/libc/stdio/xprintf_errno.c
index 0c2be46..3c831d1 100644
--- a/lib/libc/stdio/xprintf_errno.c
+++ b/lib/libc/stdio/xprintf_errno.c
@@ -34,6 +34,7 @@
 #include <vis.h>
 #include <assert.h>
 #include <sys/time.h>
+#include "errlst.h"
 #include "printf.h"
 
 int
@@ -54,7 +55,7 @@ __printf_render_errno(struct __printf_io *io, const struct printf_info *pi __unu
 
 	ret = 0;
 	error = *((const int *)arg[0]);
-	if (error >= 0 && error < sys_nerr) {
+	if (error >= 0 && error < __hidden_sys_nerr) {
 		p = strerror(error);
 		return (__printf_out(io, pi, p, strlen(p)));
 	}
diff --git a/lib/libc/string/strerror.c b/lib/libc/string/strerror.c
index e11b351..1d7a385 100644
--- a/lib/libc/string/strerror.c
+++ b/lib/libc/string/strerror.c
@@ -42,6 +42,8 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <stdio.h>
 
+#include "errlst.h"
+
 #define	UPREFIX		"Unknown error"
 
 /*
@@ -87,7 +89,7 @@ strerror_r(int errnum, char *strerrbuf, size_t buflen)
 	catd = catopen("libc", NL_CAT_LOCALE);
 #endif
 
-	if (errnum < 0 || errnum >= sys_nerr) {
+	if (errnum < 0 || errnum >= __hidden_sys_nerr) {
 		errstr(errnum,
 #if defined(NLS)
 			catgets(catd, 1, 0xffff, UPREFIX),
@@ -99,9 +101,9 @@ strerror_r(int errnum, char *strerrbuf, size_t buflen)
 	} else {
 		if (strlcpy(strerrbuf,
 #if defined(NLS)
-			catgets(catd, 1, errnum, sys_errlist[errnum]),
+			catgets(catd, 1, errnum, __hidden_sys_errlist[errnum]),
 #else
-			sys_errlist[errnum],
+			__hidden_sys_errlist[errnum],
 #endif
 			buflen) >= buflen)
 		retval = ERANGE;
diff --git a/lib/libelf/libelf_data.c b/lib/libelf/libelf_data.c
index 3fbb067..17808ef 100644
--- a/lib/libelf/libelf_data.c
+++ b/lib/libelf/libelf_data.c
@@ -84,13 +84,21 @@ _libelf_xlate_shtype(uint32_t sht)
 	case SHT_SUNW_dof:
 		return (ELF_T_BYTE);
 #endif
+	case SHT_ARM_PREEMPTMAP:
+		/* FALLTHROUGH */
+	case SHT_ARM_ATTRIBUTES:
+		/* FALLTHROUGH */
+	case SHT_ARM_DEBUGOVERLAY:
+		/* FALLTHROUGH */
+	case SHT_ARM_OVERLAYSECTION:
+		/* FALLTHROUGH */
 	case SHT_MIPS_DWARF:
 		/* FALLTHROUGH */
 	case SHT_MIPS_REGINFO:
 		/* FALLTHROUGH */
 	case SHT_MIPS_OPTIONS:
 		/* FALLTHROUGH */
-	case SHT_AMD64_UNWIND:	/* == SHT_IA_64_UNWIND */
+	case SHT_AMD64_UNWIND:	/* == SHT_IA_64_UNWIND == SHT_ARM_EXIDX */
 		return (ELF_T_BYTE);
 	default:
 		return (-1);
diff --git a/share/examples/scsi_target/scsi_target.c b/share/examples/scsi_target/scsi_target.c
index 1a7a061..0609ce1 100644
--- a/share/examples/scsi_target/scsi_target.c
+++ b/share/examples/scsi_target/scsi_target.c
@@ -365,7 +365,7 @@ init_ccbs()
 	for (i = 0; i < MAX_INITIATORS; i++) {
 		struct ccb_accept_tio *atio;
 		struct atio_descr *a_descr;
-		struct ccb_immed_notify *inot;
+		struct ccb_immediate_notify *inot;
 
 		atio = (struct ccb_accept_tio *)malloc(sizeof(*atio));
 		if (atio == NULL) {
@@ -382,7 +382,7 @@ init_ccbs()
 		atio->ccb_h.targ_descr = a_descr;
 		send_ccb((union ccb *)atio, /*priority*/1);
 
-		inot = (struct ccb_immed_notify *)malloc(sizeof(*inot));
+		inot = (struct ccb_immediate_notify *)malloc(sizeof(*inot));
 		if (inot == NULL) {
 			warn("malloc INOT");
 			return (-1);
@@ -593,7 +593,7 @@ handle_read()
 			oo += run_queue(c_descr->atio);
 			break;
 		}
-		case XPT_IMMED_NOTIFY:
+		case XPT_IMMEDIATE_NOTIFY:
 			/* INOTs are handled with priority */
 			TAILQ_INSERT_HEAD(&work_queue, &ccb->ccb_h,
 					  periph_links.tqe);
@@ -903,7 +903,7 @@ free_ccb(union ccb *ccb)
 	case XPT_ACCEPT_TARGET_IO:
 		free(ccb->ccb_h.targ_descr);
 		/* FALLTHROUGH */
-	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 	default:
 		free(ccb);
 		break;
diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4
index 8d4d202..c7b2189 100644
--- a/share/man/man4/vtnet.4
+++ b/share/man/man4/vtnet.4
@@ -69,14 +69,30 @@ prompt before booting the kernel or stored in
 .Xr loader.conf 5 .
 .Bl -tag -width "xxxxxx"
 .It Va hw.vtnet.csum_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .csum_disable
 This tunable disables receive and send checksum offload.
 The default value is 0.
 .It Va hw.vtnet.tso_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .tso_disable
 This tunable disables TSO.
 The default value is 0.
 .It Va hw.vtnet.lro_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_disable
 This tunable disables LRO.
 The default value is 0.
+.It Va hw.vtnet.mq_disable
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_disable
+This tunable disables multiqueue.
+The default value is 0.
+.It Va hw.vtnet.mq_max_pairs
+.It Va hw.vtnet. Ns Ar X Ns Va .mq_max_pairs
+This tunable sets the maximum number of transmit and receive queue pairs.
+Multiple queues are only supported when the Multiqueue feature is negotiated.
+This driver supports a maximum of 8 queue pairs.
+The number of queue pairs used is the lesser of the maximum supported by the
+driver and the hypervisor, the number of CPUs present in the guest, and this
+tunable if not zero.
+The default value is 0.
 .El
 .Sh SEE ALSO
 .Xr arp 4 ,
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index e868cf5..79ec5ed 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -160,11 +160,11 @@ IDTVEC(xen_intr_upcall)
 
 	SUPERALIGN_TEXT
 global_invltlb:
-	movl	%cr4,%eax
-	andl	$~0x80,%eax
-	movl	%eax,%cr4
-	orl	$0x80,%eax
-	movl	%eax,%cr4
+	movq	%cr4,%rax
+	andq	$~0x80,%rax	/* PGE */
+	movq	%rax,%cr4
+	orq	$0x80,%rax
+	movq	%rax,%cr4
 invltlb_ret_clear_pm_save:
 	movq	smp_tlb_pmap,%rdx
 	testq	%rdx,%rdx
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a134e10..d905961 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -762,7 +762,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 
-#ifdef SMP
 	/* Initialize TLB Context Id. */
 	TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 	if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
@@ -773,8 +772,10 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 		invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
 		    != 0;
 		kernel_pmap->pm_pcid = 0;
-	} else
+#ifndef SMP
+		pmap_pcid_enabled = 0;
 #endif
+	} else
 		pmap_pcid_enabled = 0;
 }
 
diff --git a/sys/cam/ctl/scsi_ctl.c b/sys/cam/ctl/scsi_ctl.c
index 97200ca..45acdba 100644
--- a/sys/cam/ctl/scsi_ctl.c
+++ b/sys/cam/ctl/scsi_ctl.c
@@ -961,23 +961,23 @@ ctlfestart(struct cam_periph *periph, union ccb *start_ccb)
 
 		/*
 		 * Valid combinations:
-		 *  - CAM_SEND_STATUS, SCATTER_VALID = 0, dxfer_len = 0,
+		 *  - CAM_SEND_STATUS, CAM_DATA_SG = 0, dxfer_len = 0,
 		 *    sglist_cnt = 0
-		 *  - CAM_SEND_STATUS = 0, SCATTER_VALID = 0, dxfer_len != 0,
+		 *  - CAM_SEND_STATUS = 0, CAM_DATA_SG = 0, dxfer_len != 0,
 		 *    sglist_cnt = 0 
-		 *  - CAM_SEND_STATUS = 0, SCATTER_VALID, dxfer_len != 0,
+		 *  - CAM_SEND_STATUS = 0, CAM_DATA_SG, dxfer_len != 0,
 		 *    sglist_cnt != 0
 		 */
 #ifdef CTLFEDEBUG
 		if (((flags & CAM_SEND_STATUS)
-		  && (((flags & CAM_SCATTER_VALID) != 0)
+		  && (((flags & CAM_DATA_SG) != 0)
 		   || (dxfer_len != 0)
 		   || (csio->sglist_cnt != 0)))
 		 || (((flags & CAM_SEND_STATUS) == 0)
 		  && (dxfer_len == 0))
-		 || ((flags & CAM_SCATTER_VALID)
+		 || ((flags & CAM_DATA_SG)
 		  && (csio->sglist_cnt == 0))
-		 || (((flags & CAM_SCATTER_VALID) == 0)
+		 || (((flags & CAM_DATA_SG) == 0)
 		  && (csio->sglist_cnt != 0))) {
 			printf("%s: tag %04x cdb %02x flags %#x dxfer_len "
 			       "%d sg %u\n", __func__, atio->tag_id,
diff --git a/sys/cam/scsi/scsi_enc.c b/sys/cam/scsi/scsi_enc.c
index bb7a1a0..775d8f6 100644
--- a/sys/cam/scsi/scsi_enc.c
+++ b/sys/cam/scsi/scsi_enc.c
@@ -56,6 +56,8 @@ __FBSDID("$FreeBSD$");
 #include <cam/scsi/scsi_enc.h>
 #include <cam/scsi/scsi_enc_internal.h>
 
+#include <opt_ses.h>
+
 MALLOC_DEFINE(M_SCSIENC, "SCSI ENC", "SCSI ENC buffers");
 
 /* Enclosure type independent driver */
@@ -719,12 +721,12 @@ enc_type(struct ccb_getdev *cgd)
 		return (ENC_NONE);
 	}
 
-#ifdef	ENC_ENABLE_PASSTHROUGH
+#ifdef	SES_ENABLE_PASSTHROUGH
 	if ((iqd[6] & 0x40) && (iqd[2] & 0x7) >= 2) {
 		/*
 		 * PassThrough Device.
 		 */
-		return (ENC_ENC_PASSTHROUGH);
+		return (ENC_SES_PASSTHROUGH);
 	}
 #endif
 
diff --git a/sys/cam/scsi/scsi_targ_bh.c b/sys/cam/scsi/scsi_targ_bh.c
index 92a7ac8..bcf4eea 100644
--- a/sys/cam/scsi/scsi_targ_bh.c
+++ b/sys/cam/scsi/scsi_targ_bh.c
@@ -283,16 +283,13 @@ targbhenlun(struct cam_periph *periph)
 		xpt_setup_ccb(&atio->ccb_h, periph->path, CAM_PRIORITY_NORMAL);
 		atio->ccb_h.func_code = XPT_ACCEPT_TARGET_IO;
 		atio->ccb_h.cbfcnp = targbhdone;
-		xpt_action((union ccb *)atio);
-		status = atio->ccb_h.status;
-		if (status != CAM_REQ_INPROG) {
-			targbhfreedescr(atio->ccb_h.ccb_descr);
-			free(atio, M_SCSIBH);
-			break;
-		}
 		((struct targbh_cmd_desc*)atio->ccb_h.ccb_descr)->atio_link =
 		    softc->accept_tio_list;
 		softc->accept_tio_list = atio;
+		xpt_action((union ccb *)atio);
+		status = atio->ccb_h.status;
+		if (status != CAM_REQ_INPROG)
+			break;
 	}
 
 	if (i == 0) {
@@ -308,10 +305,10 @@ targbhenlun(struct cam_periph *periph)
 	 * so the SIM can tell us of asynchronous target mode events.
 	 */
 	for (i = 0; i < MAX_ACCEPT; i++) {
-		struct ccb_immed_notify *inot;
+		struct ccb_immediate_notify *inot;
 
-		inot = (struct ccb_immed_notify*)malloc(sizeof(*inot), M_SCSIBH,
-						        M_NOWAIT);
+		inot = (struct ccb_immediate_notify*)malloc(sizeof(*inot),
+			    M_SCSIBH, M_NOWAIT);
 
 		if (inot == NULL) {
 			status = CAM_RESRC_UNAVAIL;
@@ -319,16 +316,14 @@ targbhenlun(struct cam_periph *periph)
 		}
 
 		xpt_setup_ccb(&inot->ccb_h, periph->path, CAM_PRIORITY_NORMAL);
-		inot->ccb_h.func_code = XPT_IMMED_NOTIFY;
+		inot->ccb_h.func_code = XPT_IMMEDIATE_NOTIFY;
 		inot->ccb_h.cbfcnp = targbhdone;
+		SLIST_INSERT_HEAD(&softc->immed_notify_slist, &inot->ccb_h,
+				  periph_links.sle);
 		xpt_action((union ccb *)inot);
 		status = inot->ccb_h.status;
-		if (status != CAM_REQ_INPROG) {
-			free(inot, M_SCSIBH);
+		if (status != CAM_REQ_INPROG)
 			break;
-		}
-		SLIST_INSERT_HEAD(&softc->immed_notify_slist, &inot->ccb_h,
-				  periph_links.sle);
 	}
 
 	if (i == 0) {
@@ -413,7 +408,9 @@ targbhctor(struct cam_periph *periph, void *arg)
 	periph->softc = softc;
 	softc->init_level++;
 
-	return (targbhenlun(periph));
+	if (targbhenlun(periph) != CAM_REQ_CMP)
+		cam_periph_invalidate(periph);
+	return (CAM_REQ_CMP);
 }
 
 static void
@@ -715,7 +712,7 @@ targbhdone(struct cam_periph *periph, union ccb *done_ccb)
 		}
 		break;
 	}
-	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 	{
 		int frozen;
 
diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c
index 78e96fb..4b4ad78 100644
--- a/sys/cam/scsi/scsi_target.c
+++ b/sys/cam/scsi/scsi_target.c
@@ -551,6 +551,7 @@ targwrite(struct cdev *dev, struct uio *uio, int ioflag)
 		switch (func_code) {
 		case XPT_ACCEPT_TARGET_IO:
 		case XPT_IMMED_NOTIFY:
+		case XPT_IMMEDIATE_NOTIFY:
 			cam_periph_lock(softc->periph);
 			ccb = targgetccb(softc, func_code, priority);
 			descr = (struct targ_cmd_descr *)ccb->ccb_h.targ_descr;
@@ -781,6 +782,7 @@ targdone(struct cam_periph *periph, union ccb *done_ccb)
 	switch (done_ccb->ccb_h.func_code) {
 	/* All FC_*_QUEUED CCBs go back to userland */
 	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 	case XPT_ACCEPT_TARGET_IO:
 	case XPT_CONT_TARGET_IO:
 		TAILQ_INSERT_TAIL(&softc->user_ccb_queue, &done_ccb->ccb_h,
@@ -961,6 +963,7 @@ targfreeccb(struct targ_softc *softc, union ccb *ccb)
 	switch (ccb->ccb_h.func_code) {
 	case XPT_ACCEPT_TARGET_IO:
 	case XPT_IMMED_NOTIFY:
+	case XPT_IMMEDIATE_NOTIFY:
 		CAM_DEBUG_PRINT(CAM_DEBUG_PERIPH, ("freeing ccb %p\n", ccb));
 		free(ccb, M_TARG);
 		break;
@@ -1131,6 +1134,9 @@ targccblen(xpt_opcode func_code)
 	case XPT_IMMED_NOTIFY:
 		len = sizeof(struct ccb_immed_notify);
 		break;
+	case XPT_IMMEDIATE_NOTIFY:
+		len = sizeof(struct ccb_immediate_notify);
+		break;
 	case XPT_REL_SIMQ:
 		len = sizeof(struct ccb_relsim);
 		break;
diff --git a/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c b/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
index 3793adf..9582c97 100644
--- a/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/powerpc/dtrace_isa.c
@@ -349,50 +349,84 @@ zero:
 uint64_t
 dtrace_getarg(int arg, int aframes)
 {
-	return (0);
-}
-
-#ifdef notyet
-{
-	int depth = 0;
-	register_t sp;
-	vm_offset_t callpc;
-	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
-
-	if (intrpc != 0)
-		pcstack[depth++] = (pc_t) intrpc;
-
-	aframes++;
-
-	sp = dtrace_getfp();
+	uintptr_t val;
+	uintptr_t *fp = (uintptr_t *)dtrace_getfp();
+	uintptr_t *stack;
+	int i;
 
-	while (depth < pcstack_limit) {
-		if (!INKERNEL((long) frame))
-			break;
+	/*
+	 * A total of 8 arguments are passed via registers; any argument with
+	 * index of 7 or lower is therefore in a register.
+	 */
+	int inreg = 7;
 
-		callpc = *(void **)(sp + RETURN_OFFSET);
+	for (i = 1; i <= aframes; i++) {
+		fp = (uintptr_t *)*fp;
 
-		if (!INKERNEL(callpc))
-			break;
+		/*
+		 * On ppc32 AIM, and booke, trapexit() is the immediately following
+		 * label.  On ppc64 AIM trapexit() follows a nop.
+		 */
+		if (((long)(fp[1]) == (long)trapexit) ||
+				(((long)(fp[1]) + 4 == (long)trapexit))) {
+			/*
+			 * In the case of powerpc, we will use the pointer to the regs
+			 * structure that was pushed when we took the trap.  To get this
+			 * structure, we must increment beyond the frame structure.  If the
+			 * argument that we're seeking is passed on the stack, we'll pull
+			 * the true stack pointer out of the saved registers and decrement
+			 * our argument by the number of arguments passed in registers; if
+			 * the argument we're seeking is passed in regsiters, we can just
+			 * load it directly.
+			 */
+#ifdef __powerpc64__
+			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 48);
+#else
+			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 8);
+#endif
 
-		if (aframes > 0) {
-			aframes--;
-			if ((aframes == 0) && (caller != 0)) {
-				pcstack[depth++] = caller;
+			if (arg <= inreg) {
+				stack = &rp->fixreg[3];
+			} else {
+				stack = (uintptr_t *)(rp->fixreg[1]);
+				arg -= inreg;
 			}
-		}
-		else {
-			pcstack[depth++] = callpc;
+			goto load;
 		}
 
-		sp = *(void **)sp;
 	}
 
-	for (; depth < pcstack_limit; depth++) {
-		pcstack[depth] = 0;
+	/*
+	 * We know that we did not come through a trap to get into
+	 * dtrace_probe() -- the provider simply called dtrace_probe()
+	 * directly.  As this is the case, we need to shift the argument
+	 * that we're looking for:  the probe ID is the first argument to
+	 * dtrace_probe(), so the argument n will actually be found where
+	 * one would expect to find argument (n + 1).
+	 */
+	arg++;
+
+	if (arg <= inreg) {
+		/*
+		 * This shouldn't happen.  If the argument is passed in a
+		 * register then it should have been, well, passed in a
+		 * register...
+		 */
+		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
+		return (0);
 	}
+
+	arg -= (inreg + 1);
+	stack = fp + 2;
+
+load:
+	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+	val = stack[arg];
+	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+	return (val);
+	return (0);
 }
-#endif
 
 int
 dtrace_getstackdepth(int aframes)
diff --git a/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c b/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c
index e6f1ec0..d22f207 100644
--- a/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/powerpc/dtrace_subr.c
@@ -51,6 +51,8 @@ extern int		dtrace_in_probe;
 extern dtrace_id_t	dtrace_probeid_error;
 extern int (*dtrace_invop_jump_addr)(struct trapframe *);
 
+extern void dtrace_getnanotime(struct timespec *tsp);
+
 int dtrace_invop(uintptr_t, uintptr_t *, uintptr_t);
 void dtrace_invop_init(void);
 void dtrace_invop_uninit(void);
@@ -63,13 +65,13 @@ typedef struct dtrace_invop_hdlr {
 dtrace_invop_hdlr_t *dtrace_invop_hdlr;
 
 int
-dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
+dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t arg0)
 {
 	dtrace_invop_hdlr_t *hdlr;
 	int rval;
 
 	for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next)
-		if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0)
+		if ((rval = hdlr->dtih_func(addr, stack, arg0)) != 0)
 			return (rval);
 
 	return (0);
@@ -134,7 +136,7 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
-	    smp_no_rendevous_barrier, arg);
+			smp_no_rendevous_barrier, arg);
 }
 
 static void
@@ -145,9 +147,82 @@ dtrace_sync_func(void)
 void
 dtrace_sync(void)
 {
-        dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
+	dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
 }
 
+static int64_t	tgt_cpu_tsc;
+static int64_t	hst_cpu_tsc;
+static int64_t	timebase_skew[MAXCPU];
+static uint64_t	nsec_scale;
+
+/* See below for the explanation of this macro. */
+/* This is taken from the amd64 dtrace_subr, to provide a synchronized timer
+ * between multiple processors in dtrace.  Since PowerPC Timebases can be much
+ * lower than x86, the scale shift is 26 instead of 28, allowing for a 15.63MHz
+ * timebase.
+ */
+#define SCALE_SHIFT	26
+
+static void
+dtrace_gethrtime_init_cpu(void *arg)
+{
+	uintptr_t cpu = (uintptr_t) arg;
+
+	if (cpu == curcpu)
+		tgt_cpu_tsc = mftb();
+	else
+		hst_cpu_tsc = mftb();
+}
+
+static void
+dtrace_gethrtime_init(void *arg)
+{
+	struct pcpu *pc;
+	uint64_t tb_f;
+	cpuset_t map;
+	int i;
+
+	tb_f = cpu_tickrate();
+
+	/*
+	 * The following line checks that nsec_scale calculated below
+	 * doesn't overflow 32-bit unsigned integer, so that it can multiply
+	 * another 32-bit integer without overflowing 64-bit.
+	 * Thus minimum supported Timebase frequency is 15.63MHz.
+	 */
+	KASSERT(tb_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("Timebase frequency is too low"));
+
+	/*
+	 * We scale up NANOSEC/tb_f ratio to preserve as much precision
+	 * as possible.
+	 * 2^26 factor was chosen quite arbitrarily from practical
+	 * considerations:
+	 * - it supports TSC frequencies as low as 15.63MHz (see above);
+	 */
+	nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tb_f;
+
+	/* The current CPU is the reference one. */
+	sched_pin();
+	timebase_skew[curcpu] = 0;
+	CPU_FOREACH(i) {
+		if (i == curcpu)
+			continue;
+
+		pc = pcpu_find(i);
+		CPU_SETOF(PCPU_GET(cpuid), &map);
+		CPU_SET(pc->pc_cpuid, &map);
+
+		smp_rendezvous_cpus(map, NULL,
+				dtrace_gethrtime_init_cpu,
+				smp_no_rendevous_barrier, (void *)(uintptr_t) i);
+
+		timebase_skew[i] = tgt_cpu_tsc - hst_cpu_tsc;
+	}
+	sched_unpin();
+}
+
+SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL);
+
 /*
  * DTrace needs a high resolution time function which can
  * be called from a probe context and guaranteed not to have
@@ -158,12 +233,21 @@ dtrace_sync(void)
 uint64_t
 dtrace_gethrtime()
 {
-	struct      timespec curtime;
-
-	nanouptime(&curtime);
-
-	return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec);
+	uint64_t timebase;
+	uint32_t lo;
+	uint32_t hi;
 
+	/*
+	 * We split timebase value into lower and higher 32-bit halves and separately
+	 * scale them with nsec_scale, then we scale them down by 2^28
+	 * (see nsec_scale calculations) taking into account 32-bit shift of
+	 * the higher half and finally add.
+	 */
+	timebase = mftb() - timebase_skew[curcpu];
+	lo = timebase;
+	hi = timebase >> 32;
+	return (((lo * nsec_scale) >> SCALE_SHIFT) +
+		((hi * nsec_scale) << (32 - SCALE_SHIFT)));
 }
 
 uint64_t
@@ -171,12 +255,12 @@ dtrace_gethrestime(void)
 {
 	struct      timespec curtime;
 
-	getnanotime(&curtime);
+	dtrace_getnanotime(&curtime);
 
 	return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec);
 }
 
-/* Function to handle DTrace traps during probes. See amd64/amd64/trap.c */
+/* Function to handle DTrace traps during probes. See powerpc/powerpc/trap.c */
 int
 dtrace_trap(struct trapframe *frame, u_int type)
 {
@@ -196,34 +280,34 @@ dtrace_trap(struct trapframe *frame, u_int type)
 		 * All the rest will be handled in the usual way.
 		 */
 		switch (type) {
-		/* Page fault. */
-		case EXC_DSI:
-		case EXC_DSE:
-			/* Flag a bad address. */
-			cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
-			cpu_core[curcpu].cpuc_dtrace_illval = frame->cpu.aim.dar;
-
-			/*
-			 * Offset the instruction pointer to the instruction
-			 * following the one causing the fault.
-			 */
-			frame->srr0 += sizeof(int);
-			return (1);
-		case EXC_ISI:
-		case EXC_ISE:
-			/* Flag a bad address. */
-			cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
-			cpu_core[curcpu].cpuc_dtrace_illval = frame->srr0;
-
-			/*
-			 * Offset the instruction pointer to the instruction
-			 * following the one causing the fault.
-			 */
-			frame->srr0 += sizeof(int);
-			return (1);
-		default:
-			/* Handle all other traps in the usual way. */
-			break;
+			/* Page fault. */
+			case EXC_DSI:
+			case EXC_DSE:
+				/* Flag a bad address. */
+				cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
+				cpu_core[curcpu].cpuc_dtrace_illval = frame->cpu.aim.dar;
+
+				/*
+				 * Offset the instruction pointer to the instruction
+				 * following the one causing the fault.
+				 */
+				frame->srr0 += sizeof(int);
+				return (1);
+			case EXC_ISI:
+			case EXC_ISE:
+				/* Flag a bad address. */
+				cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
+				cpu_core[curcpu].cpuc_dtrace_illval = frame->srr0;
+
+				/*
+				 * Offset the instruction pointer to the instruction
+				 * following the one causing the fault.
+				 */
+				frame->srr0 += sizeof(int);
+				return (1);
+			default:
+				/* Handle all other traps in the usual way. */
+				break;
 		}
 	}
 
@@ -237,28 +321,29 @@ dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
 {
 
 	dtrace_probe(dtrace_probeid_error, (uint64_t)(uintptr_t)state,
-	    (uintptr_t)epid,
-	    (uintptr_t)which, (uintptr_t)fault, (uintptr_t)fltoffs);
+			(uintptr_t)epid,
+			(uintptr_t)which, (uintptr_t)fault, (uintptr_t)fltoffs);
 }
 
 static int
 dtrace_invop_start(struct trapframe *frame)
 {
 	switch (dtrace_invop(frame->srr0, (uintptr_t *)frame, frame->fixreg[3])) {
-	case DTRACE_INVOP_JUMP:
-		break;
-	case DTRACE_INVOP_BCTR:
-		frame->srr0 = frame->ctr;
-		break;
-	case DTRACE_INVOP_BLR:
-		frame->srr0 = frame->lr;
-		break;
-	case DTRACE_INVOP_MFLR_R0:
-		frame->fixreg[0] = frame->lr ;
-		break;
-	default:
-		return (-1);
-		break;
+		case DTRACE_INVOP_JUMP:
+			break;
+		case DTRACE_INVOP_BCTR:
+			frame->srr0 = frame->ctr;
+			break;
+		case DTRACE_INVOP_BLR:
+			frame->srr0 = frame->lr;
+			break;
+		case DTRACE_INVOP_MFLR_R0:
+			frame->fixreg[0] = frame->lr;
+			frame->srr0 = frame->srr0 + 4;
+			break;
+		default:
+			return (-1);
+			break;
 	}
 
 	return (0);
diff --git a/sys/cddl/dev/fbt/fbt_powerpc.c b/sys/cddl/dev/fbt/fbt_powerpc.c
index bee3dc7..cdaa06a 100644
--- a/sys/cddl/dev/fbt/fbt_powerpc.c
+++ b/sys/cddl/dev/fbt/fbt_powerpc.c
@@ -57,6 +57,7 @@
 #include <sys/sysproto.h>
 #include <sys/uio.h>
 #include <sys/unistd.h>
+#include <machine/md_var.h>
 #include <machine/stdarg.h>
 
 #include <sys/dtrace.h>
@@ -172,7 +173,11 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
 					tmp = fbt->fbtp_savedval & FBT_BR_MASK;
 					/* Sign extend. */
 					if (tmp & 0x02000000)
-						tmp |= 0xFC000000;
+#ifdef __powerpc64__
+						tmp |= 0xfffffffffc000000ULL;
+#else
+						tmp |= 0xfc000000UL;
+#endif
 					frame->srr0 += tmp;
 				}
 				cpu->cpu_dtrace_caller = 0;
@@ -193,9 +198,12 @@ fbt_provide_module_function(linker_file_t lf, int symindx,
 	const char *name = symval->name;
 	fbt_probe_t *fbt, *retfbt;
 	int j;
-	int size;
 	u_int32_t *instr, *limit;
 
+	/* PowerPC64 uses '.' prefixes on symbol names, ignore it. */
+	if (name[0] == '.')
+		name++;
+
 	if (strncmp(name, "dtrace_", 7) == 0 &&
 	    strncmp(name, "dtrace_safe_", 12) != 0) {
 		/*
@@ -210,8 +218,6 @@ fbt_provide_module_function(linker_file_t lf, int symindx,
 	if (name[0] == '_' && name[1] == '_')
 		return (0);
 
-	size = symval->size;
-
 	instr = (u_int32_t *) symval->value;
 	limit = (u_int32_t *) symval->value + symval->size;
 
@@ -219,7 +225,7 @@ fbt_provide_module_function(linker_file_t lf, int symindx,
 		if (*instr == FBT_MFLR_R0)
 			break;
 
-	if (*instr != FBT_MFLR_R0);
+	if (*instr != FBT_MFLR_R0)
 		return (0);
 
 	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
@@ -264,9 +270,6 @@ again:
 		}
 	}
 
-	if (*instr == FBT_MFLR_R0)
-		return (0);
-
 	if (*instr != FBT_MTLR_R0) {
 		instr++;
 		goto again;
@@ -291,7 +294,7 @@ again:
 
 	if (retfbt == NULL) {
 		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
-		    name, FBT_RETURN, 3, fbt);
+		    name, FBT_RETURN, 5, fbt);
 	} else {
 		retfbt->fbtp_next = fbt;
 		fbt->fbtp_id = retfbt->fbtp_id;
@@ -317,7 +320,7 @@ again:
 
 	lf->fbt_nentries++;
 
-	instr += size;
+	instr += 4;
 	goto again;
 }
 
@@ -434,6 +437,7 @@ fbt_enable(void *arg, dtrace_id_t id, void *parg)
 
 	for (; fbt != NULL; fbt = fbt->fbtp_next) {
 		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
+		__syncicache(fbt->fbtp_patchpoint, 4);
 	}
 }
 
@@ -449,8 +453,10 @@ fbt_disable(void *arg, dtrace_id_t id, void *parg)
 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
 		return;
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
 		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
+		__syncicache(fbt->fbtp_patchpoint, 4);
+	}
 }
 
 static void
@@ -464,8 +470,10 @@ fbt_suspend(void *arg, dtrace_id_t id, void *parg)
 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
 		return;
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
 		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
+		__syncicache(fbt->fbtp_patchpoint, 4);
+	}
 }
 
 static void
@@ -479,15 +487,16 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg)
 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
 		return;
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
 		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
+		__syncicache(fbt->fbtp_patchpoint, 4);
+	}
 }
 
 static int
 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
 {
 	const Elf_Sym *symp = lc->symtab;;
-	const char *name;
 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
 	int i;
@@ -519,11 +528,6 @@ fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
 			continue;
 		}
 
-		if (symp->st_name < lc->strcnt)
-			name = lc->strtab + symp->st_name;
-		else
-			name = "(?)";
-
 		switch (ELF_ST_TYPE(symp->st_info)) {
 		case STT_OBJECT:
 			if (objtoff >= hp->cth_funcoff ||
@@ -690,6 +694,8 @@ fbt_typoff_init(linker_ctf_t *lc)
 		pop[kind]++;
 	}
 
+	/* account for a sentinel value below */
+	ctf_typemax++;
 	*lc->typlenp = ctf_typemax;
 
 	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
@@ -1171,6 +1177,11 @@ fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_a
 	uint32_t offset;
 	ushort_t info, kind, n;
 
+	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
+		(void) strcpy(desc->dtargd_native, "int");
+		return;
+	}
+
 	desc->dtargd_ndx = DTRACE_ARGNONE;
 
 	/* Get a pointer to the CTF data and it's length. */
@@ -1221,12 +1232,19 @@ fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_a
 		return;
 	}
 
-	/* Check if the requested argument doesn't exist. */
-	if (ndx >= n)
-		return;
+	if (fbt->fbtp_roffset != 0) {
+		/* Only return type is available for args[1] in return probe. */
+		if (ndx > 1)
+			return;
+		ASSERT(ndx == 1);
+	} else {
+		/* Check if the requested argument doesn't exist. */
+		if (ndx >= n)
+			return;
 
-	/* Skip the return type and arguments up to the one requested. */
-	dp += ndx + 1;
+		/* Skip the return type and arguments up to the one requested. */
+		dp += ndx + 1;
+	}
 
 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
 		desc->dtargd_ndx = ndx;
@@ -1234,6 +1252,15 @@ fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_a
 	return;
 }
 
+static int
+fbt_linker_file_cb(linker_file_t lf, void *arg)
+{
+
+	fbt_provide_module(arg, lf);
+
+	return (0);
+}
+
 static void
 fbt_load(void *dummy)
 {
@@ -1257,6 +1284,9 @@ fbt_load(void *dummy)
 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
 		return;
+
+	/* Create probes for the kernel and already-loaded modules. */
+	linker_file_foreach(fbt_linker_file_cb, NULL);
 }
 
 
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 89604d1..f757394 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -29,10 +29,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include "opt_device_polling.h"
-#endif
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -46,6 +42,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/sglist.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/taskqueue.h>
+#include <sys/smp.h>
+#include <machine/smp.h>
 
 #include <vm/uma.h>
 
@@ -63,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <netinet/sctp.h>
@@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$");
 
 #include "virtio_if.h"
 
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
@@ -87,82 +90,139 @@ static int	vtnet_detach(device_t);
 static int	vtnet_suspend(device_t);
 static int	vtnet_resume(device_t);
 static int	vtnet_shutdown(device_t);
+static int	vtnet_attach_completed(device_t);
 static int	vtnet_config_change(device_t);
 
 static void	vtnet_negotiate_features(struct vtnet_softc *);
+static void	vtnet_setup_features(struct vtnet_softc *);
+static int	vtnet_init_rxq(struct vtnet_softc *, int);
+static int	vtnet_init_txq(struct vtnet_softc *, int);
+static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
+static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
+static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
-static void	vtnet_get_hwaddr(struct vtnet_softc *);
-static void	vtnet_set_hwaddr(struct vtnet_softc *);
-static int	vtnet_is_link_up(struct vtnet_softc *);
-static void	vtnet_update_link_status(struct vtnet_softc *);
-static void	vtnet_watchdog(struct vtnet_softc *);
+static int	vtnet_setup_interface(struct vtnet_softc *);
 static int	vtnet_change_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 
-static int	vtnet_init_rx_vq(struct vtnet_softc *);
-static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
-static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
-
-#ifdef DEVICE_POLLING
-static poll_handler_t vtnet_poll;
-#endif
-
-static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
-		    struct mbuf **);
-static int	vtnet_replace_rxbuf(struct vtnet_softc *,
+static int	vtnet_rxq_populate(struct vtnet_rxq *);
+static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
+static struct mbuf *
+		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
+static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
-static int	vtnet_newbuf(struct vtnet_softc *);
-static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
-static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
-static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
-static void	vtnet_vlan_tag_remove(struct mbuf *);
-static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
+static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
+static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
+		     struct virtio_net_hdr *);
+static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
+static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
+static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
+static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
-static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
+static int	vtnet_rxq_eof(struct vtnet_rxq *);
 static void	vtnet_rx_vq_intr(void *);
+static void	vtnet_rxq_tq_intr(void *, int);
 
-static void	vtnet_txeof(struct vtnet_softc *);
-static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
+static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
+static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
+		    int *, int *, int *);
+static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
+		    int, struct virtio_net_hdr *);
+static struct mbuf *
+		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
 		    struct virtio_net_hdr *);
-static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
+static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
 		    struct vtnet_tx_header *);
-static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
-static void	vtnet_start_locked(struct ifnet *);
+static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **);
+#ifdef VTNET_LEGACY_TX
+static void	vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
 static void	vtnet_start(struct ifnet *);
-static void	vtnet_tick(void *);
+#else
+static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
+static int	vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
+static void	vtnet_txq_tq_deferred(void *, int);
+#endif
+static void	vtnet_txq_tq_intr(void *, int);
+static void	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
+static void	vtnet_tx_start_all(struct vtnet_softc *);
+
+#ifndef VTNET_LEGACY_TX
+static void	vtnet_qflush(struct ifnet *);
+#endif
+
+static int	vtnet_watchdog(struct vtnet_txq *);
+static void	vtnet_rxq_accum_stats(struct vtnet_rxq *,
+		    struct vtnet_rxq_stats *);
+static void	vtnet_txq_accum_stats(struct vtnet_txq *,
+		    struct vtnet_txq_stats *);
+static void	vtnet_accumulate_stats(struct vtnet_softc *);
+static void	vtnet_tick(void *);
 
+static void	vtnet_start_taskqueues(struct vtnet_softc *);
+static void	vtnet_free_taskqueues(struct vtnet_softc *);
+static void	vtnet_drain_taskqueues(struct vtnet_softc *);
+
+static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_stop_rendezvous(struct vtnet_softc *);
 static void	vtnet_stop(struct vtnet_softc *);
+static int	vtnet_virtio_reinit(struct vtnet_softc *);
+static void	vtnet_init_rx_filters(struct vtnet_softc *);
+static int	vtnet_init_rx_queues(struct vtnet_softc *);
+static int	vtnet_init_tx_queues(struct vtnet_softc *);
+static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
+static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
 static int	vtnet_reinit(struct vtnet_softc *);
 static void	vtnet_init_locked(struct vtnet_softc *);
 static void	vtnet_init(void *);
 
+static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
 		    struct sglist *, int, int);
-
-static void	vtnet_rx_filter(struct vtnet_softc *sc);
+static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
+static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
 static int	vtnet_set_promisc(struct vtnet_softc *, int);
 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
+static void	vtnet_attach_disable_promisc(struct vtnet_softc *);
+static void	vtnet_rx_filter(struct vtnet_softc *);
 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
-
 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
-static void	vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
+static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
 
+static int	vtnet_is_link_up(struct vtnet_softc *);
+static void	vtnet_update_link_status(struct vtnet_softc *);
 static int	vtnet_ifmedia_upd(struct ifnet *);
 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static void	vtnet_get_hwaddr(struct vtnet_softc *);
+static void	vtnet_set_hwaddr(struct vtnet_softc *);
+static void	vtnet_vlan_tag_remove(struct mbuf *);
 
-static void	vtnet_add_statistics(struct vtnet_softc *);
-
-static int	vtnet_enable_rx_intr(struct vtnet_softc *);
-static int	vtnet_enable_tx_intr(struct vtnet_softc *);
-static void	vtnet_disable_rx_intr(struct vtnet_softc *);
-static void	vtnet_disable_tx_intr(struct vtnet_softc *);
+static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_rxq *);
+static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
+		    struct sysctl_oid_list *, struct vtnet_txq *);
+static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
+static void	vtnet_setup_sysctl(struct vtnet_softc *);
+
+static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
+static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
+static int	vtnet_txq_enable_intr(struct vtnet_txq *);
+static void	vtnet_txq_disable_intr(struct vtnet_txq *);
+static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_enable_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
+static void	vtnet_disable_interrupts(struct vtnet_softc *);
+
+static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
 
 /* Tunables. */
 static int vtnet_csum_disable = 0;
@@ -171,16 +231,25 @@ static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
+static int vtnet_mq_disable = 0;
+TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
+static int vtnet_mq_max_pairs = 0;
+TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
+static int vtnet_rx_process_limit = 512;
+TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
 /*
- * Reducing the number of transmit completed interrupts can
- * improve performance. To do so, the define below keeps the
- * Tx vq interrupt disabled and adds calls to vtnet_txeof()
- * in the start and watchdog paths. The price to pay for this
- * is the m_free'ing of transmitted mbufs may be delayed until
- * the watchdog fires.
+ * Reducing the number of transmit completed interrupts can improve
+ * performance. To do so, the define below keeps the Tx vq interrupt
+ * disabled and adds calls to vtnet_txeof() in the start and watchdog
+ * paths. The price to pay for this is the m_free'ing of transmitted
+ * mbufs may be delayed until the watchdog fires.
+ *
+ * BMV: Reintroduce this later as a run-time option, if it makes
+ * sense after the EVENT_IDX feature is supported.
+ *
+ * #define VTNET_TX_INTR_MODERATION
  */
-#define VTNET_TX_INTR_MODERATION
 
 static uma_zone_t vtnet_tx_header_zone;
 
@@ -203,21 +272,25 @@ static struct virtio_feature_desc vtnet_feature_desc[] = {
 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
+	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
+	{ VIRTIO_NET_F_MQ,		"Multiqueue"	},
+	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
 
 	{ 0, NULL }
 };
 
 static device_method_t vtnet_methods[] = {
 	/* Device methods. */
-	DEVMETHOD(device_probe,		vtnet_probe),
-	DEVMETHOD(device_attach,	vtnet_attach),
-	DEVMETHOD(device_detach,	vtnet_detach),
-	DEVMETHOD(device_suspend,	vtnet_suspend),
-	DEVMETHOD(device_resume,	vtnet_resume),
-	DEVMETHOD(device_shutdown,	vtnet_shutdown),
+	DEVMETHOD(device_probe,			vtnet_probe),
+	DEVMETHOD(device_attach,		vtnet_attach),
+	DEVMETHOD(device_detach,		vtnet_detach),
+	DEVMETHOD(device_suspend,		vtnet_suspend),
+	DEVMETHOD(device_resume,		vtnet_resume),
+	DEVMETHOD(device_shutdown,		vtnet_shutdown),
 
 	/* VirtIO methods. */
-	DEVMETHOD(virtio_config_change, vtnet_config_change),
+	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
+	DEVMETHOD(virtio_config_change,		vtnet_config_change),
 
 	DEVMETHOD_END
 };
@@ -282,56 +355,31 @@ static int
 vtnet_attach(device_t dev)
 {
 	struct vtnet_softc *sc;
-	struct ifnet *ifp;
-	int tx_size, error;
+	int error;
 
 	sc = device_get_softc(dev);
 	sc->vtnet_dev = dev;
 
-	VTNET_LOCK_INIT(sc);
-	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_MTX(sc), 0);
-
-	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
-	    vtnet_ifmedia_sts);
-	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
-	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
-
-	vtnet_add_statistics(sc);
-
+	/* Register our feature descriptions. */
 	virtio_set_feature_desc(dev, vtnet_feature_desc);
-	vtnet_negotiate_features(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
-		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	} else
-		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
-
-	sc->vtnet_rx_mbuf_size = MCLBYTES;
-	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
-		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) {
-			sc->vtnet_mac_filter = malloc(
-			    sizeof(struct vtnet_mac_filter), M_DEVBUF,
-			    M_NOWAIT | M_ZERO);
-			if (sc->vtnet_mac_filter == NULL) {
-				device_printf(dev,
-				    "cannot allocate mac filter table\n");
-				error = ENOMEM;
-				goto fail;
-			}
+	VTNET_CORE_LOCK_INIT(sc);
+	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
 
-			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
-		}
+	vtnet_setup_sysctl(sc);
+	vtnet_setup_features(sc);
 
-		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
-			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+	error = vtnet_alloc_rx_filters(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate Rx filters\n");
+		goto fail;
 	}
 
-	vtnet_get_hwaddr(sc);
+	error = vtnet_alloc_rxtx_queues(sc);
+	if (error) {
+		device_printf(dev, "cannot allocate queues\n");
+		goto fail;
+	}
 
 	error = vtnet_alloc_virtqueues(sc);
 	if (error) {
@@ -339,111 +387,21 @@ vtnet_attach(device_t dev)
 		goto fail;
 	}
 
-	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
-	if (ifp == NULL) {
-		device_printf(dev, "cannot allocate ifnet structure\n");
-		error = ENOSPC;
+	error = vtnet_setup_interface(sc);
+	if (error) {
+		device_printf(dev, "cannot setup interface\n");
 		goto fail;
 	}
 
-	ifp->if_softc = sc;
-	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
-	ifp->if_init = vtnet_init;
-	ifp->if_start = vtnet_start;
-	ifp->if_ioctl = vtnet_ioctl;
-
-	sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
-	sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
-
-	tx_size = virtqueue_size(sc->vtnet_tx_vq);
-	sc->vtnet_tx_size = tx_size;
-	IFQ_SET_MAXLEN(&ifp->if_snd, tx_size - 1);
-	ifp->if_snd.ifq_drv_maxlen = tx_size - 1;
-	IFQ_SET_READY(&ifp->if_snd);
-
-	ether_ifattach(ifp, sc->vtnet_hwaddr);
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
-		ifp->if_capabilities |= IFCAP_LINKSTATE;
-
-	/* Tell the upper layer(s) we support long frames. */
-	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
-	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
-		ifp->if_capabilities |= IFCAP_TXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
-			ifp->if_capabilities |= IFCAP_TSO4;
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
-			ifp->if_capabilities |= IFCAP_TSO6;
-		if (ifp->if_capabilities & IFCAP_TSO)
-			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
-			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
-	}
-
-	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
-		ifp->if_capabilities |= IFCAP_RXCSUM;
-
-		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
-		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
-			ifp->if_capabilities |= IFCAP_LRO;
-	}
-
-	if (ifp->if_capabilities & IFCAP_HWCSUM) {
-		/*
-		 * VirtIO does not support VLAN tagging, but we can fake
-		 * it by inserting and removing the 802.1Q header during
-		 * transmit and receive. We are then able to do checksum
-		 * offloading of VLAN frames.
-		 */
-		ifp->if_capabilities |=
-		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
-	}
-
-	ifp->if_capenable = ifp->if_capabilities;
-
-	/*
-	 * Capabilities after here are not enabled by default.
-	 */
-
-	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
-		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
-
-		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
-		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
-		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
-		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
-	}
-
-#ifdef DEVICE_POLLING
-	ifp->if_capabilities |= IFCAP_POLLING;
-#endif
-
 	error = virtio_setup_intr(dev, INTR_TYPE_NET);
 	if (error) {
 		device_printf(dev, "cannot setup virtqueue interrupts\n");
-		ether_ifdetach(ifp);
+		/* BMV: This will crash if during boot! */
+		ether_ifdetach(sc->vtnet_ifp);
 		goto fail;
 	}
 
-	/*
-	 * Device defaults to promiscuous mode for backwards
-	 * compatibility. Turn it off if possible.
-	 */
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
-		VTNET_LOCK(sc);
-		if (vtnet_set_promisc(sc, 0) != 0) {
-			ifp->if_flags |= IFF_PROMISC;
-			device_printf(dev,
-			    "cannot disable promiscuous mode\n");
-		}
-		VTNET_UNLOCK(sc);
-	} else
-		ifp->if_flags |= IFF_PROMISC;
+	vtnet_start_taskqueues(sc);
 
 fail:
 	if (error)
@@ -461,24 +419,19 @@ vtnet_detach(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	KASSERT(mtx_initialized(VTNET_MTX(sc)),
-	    ("vtnet mutex not initialized"));
-
-#ifdef DEVICE_POLLING
-	if (ifp != NULL && ifp->if_capenable & IFCAP_POLLING)
-		ether_poll_deregister(ifp);
-#endif
-
 	if (device_is_attached(dev)) {
-		VTNET_LOCK(sc);
+		VTNET_CORE_LOCK(sc);
 		vtnet_stop(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 
 		callout_drain(&sc->vtnet_tick_ch);
+		vtnet_drain_taskqueues(sc);
 
 		ether_ifdetach(ifp);
 	}
 
+	vtnet_free_taskqueues(sc);
+
 	if (sc->vtnet_vlan_attach != NULL) {
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
 		sc->vtnet_vlan_attach = NULL;
@@ -488,25 +441,20 @@ vtnet_detach(device_t dev)
 		sc->vtnet_vlan_detach = NULL;
 	}
 
-	if (sc->vtnet_mac_filter != NULL) {
-		free(sc->vtnet_mac_filter, M_DEVBUF);
-		sc->vtnet_mac_filter = NULL;
-	}
+	ifmedia_removeall(&sc->vtnet_media);
 
 	if (ifp != NULL) {
 		if_free(ifp);
 		sc->vtnet_ifp = NULL;
 	}
 
-	if (sc->vtnet_rx_vq != NULL)
-		vtnet_free_rx_mbufs(sc);
-	if (sc->vtnet_tx_vq != NULL)
-		vtnet_free_tx_mbufs(sc);
+	vtnet_free_rxtx_queues(sc);
+	vtnet_free_rx_filters(sc);
+
 	if (sc->vtnet_ctrl_vq != NULL)
 		vtnet_free_ctrl_vq(sc);
 
-	ifmedia_removeall(&sc->vtnet_media);
-	VTNET_LOCK_DESTROY(sc);
+	VTNET_CORE_LOCK_DESTROY(sc);
 
 	return (0);
 }
@@ -518,10 +466,10 @@ vtnet_suspend(device_t dev)
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_stop(sc);
 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -535,11 +483,11 @@ vtnet_resume(device_t dev)
 	sc = device_get_softc(dev);
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	if (ifp->if_flags & IFF_UP)
 		vtnet_init_locked(sc);
 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -556,15 +504,26 @@ vtnet_shutdown(device_t dev)
 }
 
 static int
+vtnet_attach_completed(device_t dev)
+{
+
+	vtnet_attach_disable_promisc(device_get_softc(dev));
+
+	return (0);
+}
+
+static int
 vtnet_config_change(device_t dev)
 {
 	struct vtnet_softc *sc;
 
 	sc = device_get_softc(dev);
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_update_link_status(sc);
-	VTNET_UNLOCK(sc);
+	if (sc->vtnet_link_active != 0)
+		vtnet_tx_start_all(sc);
+	VTNET_CORE_UNLOCK(sc);
 
 	return (0);
 }
@@ -578,188 +537,491 @@ vtnet_negotiate_features(struct vtnet_softc *sc)
 	dev = sc->vtnet_dev;
 	mask = 0;
 
-	if (vtnet_csum_disable)
+	/*
+	 * TSO and LRO are only available when their corresponding checksum
+	 * offload feature is also negotiated.
+	 */
+	if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
+		mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
+	}
+	if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
+		mask |= VTNET_TSO_FEATURES;
+	if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
+		mask |= VTNET_LRO_FEATURES;
+	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
+		mask |= VIRTIO_NET_F_MQ;
+#ifdef VTNET_LEGACY_TX
+	mask |= VIRTIO_NET_F_MQ;
+#endif
+
+	features = VTNET_FEATURES & ~mask;
+	sc->vtnet_features = virtio_negotiate_features(dev, features);
+
+	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0)
+		return;
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
+		return;
 
 	/*
-	 * TSO and LRO are only available when their corresponding
-	 * checksum offload feature is also negotiated.
+	 * LRO without mergeable buffers requires special care. This is not
+	 * ideal because every receive buffer must be large enough to hold
+	 * the maximum TCP packet, the Ethernet header, and the header. This
+	 * requires up to 34 descriptors with MCLBYTES clusters. If we do
+	 * not have indirect descriptors, LRO is disabled since the virtqueue
+	 * will not contain very many receive buffers.
 	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
+		device_printf(dev,
+		    "LRO disabled due to both mergeable buffers and indirect "
+		    "descriptors not negotiated\n");
 
-	if (vtnet_csum_disable || vtnet_tso_disable)
-		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
-		    VIRTIO_NET_F_HOST_ECN;
+		features &= ~VTNET_LRO_FEATURES;
+		sc->vtnet_features = virtio_negotiate_features(dev, features);
+	} else
+		sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+}
 
-	if (vtnet_csum_disable || vtnet_lro_disable)
-		mask |= VTNET_LRO_FEATURES;
+static void
+vtnet_setup_features(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int max_pairs, max;
 
-	features = VTNET_FEATURES & ~mask;
-#ifdef VTNET_TX_INTR_MODERATION
-	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
-#endif
-	sc->vtnet_features = virtio_negotiate_features(dev, features);
+	dev = sc->vtnet_dev;
+
+	vtnet_negotiate_features(sc);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
+		/* This feature should always be negotiated. */
+		sc->vtnet_flags |= VTNET_FLAG_MAC;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
+		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else
+		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
+		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0 &&
-	    virtio_with_feature(dev, VTNET_LRO_FEATURES)) {
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
+			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
+		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
+			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
+	    sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+		max_pairs = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
+		if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+		    max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
+			max_pairs = 1;
+	} else
+		max_pairs = 1;
+
+	if (max_pairs > 1) {
 		/*
-		 * LRO without mergeable buffers requires special care. This
-		 * is not ideal because every receive buffer must be large
-		 * enough to hold the maximum TCP packet, the Ethernet header,
-		 * and the vtnet_rx_header. This requires up to 34 descriptors
-		 * when using MCLBYTES clusters. If we do not have indirect
-		 * descriptors, LRO is disabled since the virtqueue will not
-		 * be able to contain very many receive buffers.
+		 * Limit the maximum number of queue pairs to the number of
+		 * CPUs or the configured maximum. The actual number of
+		 * queues that get used may be less.
 		 */
-		if (virtio_with_feature(dev,
-		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
-			device_printf(dev,
-			    "LRO disabled due to lack of both mergeable "
-			    "buffers and indirect descriptors\n");
-
-			sc->vtnet_features = virtio_negotiate_features(dev,
-			    features & ~VTNET_LRO_FEATURES);
-		} else
-			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
+		max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
+		if (max > 0 && max_pairs > max)
+			max_pairs = max;
+		if (max_pairs > mp_ncpus)
+			max_pairs = mp_ncpus;
+		if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
+			max_pairs = VTNET_MAX_QUEUE_PAIRS;
+		if (max_pairs > 1)
+			sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
 	}
+
+	sc->vtnet_max_vq_pairs = max_pairs;
 }
 
 static int
-vtnet_alloc_virtqueues(struct vtnet_softc *sc)
+vtnet_init_rxq(struct vtnet_softc *sc, int id)
 {
-	device_t dev;
-	struct vq_alloc_info vq_info[3];
-	int nvqs, rxsegs;
+	struct vtnet_rxq *rxq;
 
-	dev = sc->vtnet_dev;
-	nvqs = 2;
+	rxq = &sc->vtnet_rxqs[id];
 
-	/*
-	 * Indirect descriptors are not needed for the Rx
-	 * virtqueue when mergeable buffers are negotiated.
-	 * The header is placed inline with the data, not
-	 * in a separate descriptor, and mbuf clusters are
-	 * always physically contiguous.
-	 */
-	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
-		rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
-		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
-	} else
-		rxsegs = 0;
+	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
 
-	VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
-	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
-	    "%s receive", device_get_nameunit(dev));
+	rxq->vtnrx_sc = sc;
+	rxq->vtnrx_id = id;
 
-	VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
-	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
-	    "%s transmit", device_get_nameunit(dev));
+	TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
+	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
 
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
-		nvqs++;
+	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
+}
+
+static int
+vtnet_init_txq(struct vtnet_softc *sc, int id)
+{
+	struct vtnet_txq *txq;
+
+	txq = &sc->vtnet_txqs[id];
+
+	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
+	    device_get_nameunit(sc->vtnet_dev), id);
+	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
+
+	txq->vtntx_sc = sc;
+	txq->vtntx_id = id;
+
+#ifndef VTNET_LEGACY_TX
+	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
+	    M_NOWAIT, &txq->vtntx_mtx);
+	if (txq->vtntx_br == NULL)
+		return (ENOMEM);
+
+	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
+#endif
+	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
+	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
+	    taskqueue_thread_enqueue, &txq->vtntx_tq);
+	if (txq->vtntx_tq == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
 
-		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
-		    &sc->vtnet_ctrl_vq, "%s control",
-		    device_get_nameunit(dev));
+static int
+vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
+{
+	int i, npairs, error;
+
+	npairs = sc->vtnet_max_vq_pairs;
+
+	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
+	    M_NOWAIT | M_ZERO);
+	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < npairs; i++) {
+		error = vtnet_init_rxq(sc, i);
+		if (error)
+			return (error);
+		error = vtnet_init_txq(sc, i);
+		if (error)
+			return (error);
 	}
 
-	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
+	vtnet_setup_queue_sysctl(sc);
+
+	return (0);
 }
 
 static void
-vtnet_get_hwaddr(struct vtnet_softc *sc)
+vtnet_destroy_rxq(struct vtnet_rxq *rxq)
 {
-	device_t dev;
 
-	dev = sc->vtnet_dev;
+	rxq->vtnrx_sc = NULL;
+	rxq->vtnrx_id = -1;
 
-	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
-		virtio_read_device_config(dev,
-		    offsetof(struct virtio_net_config, mac),
-		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
-	} else {
-		/* Generate random locally administered unicast address. */
-		sc->vtnet_hwaddr[0] = 0xB2;
-		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
+		mtx_destroy(&rxq->vtnrx_mtx);
+}
 
-		vtnet_set_hwaddr(sc);
+static void
+vtnet_destroy_txq(struct vtnet_txq *txq)
+{
+
+	txq->vtntx_sc = NULL;
+	txq->vtntx_id = -1;
+
+#ifndef VTNET_LEGACY_TX
+	if (txq->vtntx_br != NULL) {
+		buf_ring_free(txq->vtntx_br, M_DEVBUF);
+		txq->vtntx_br = NULL;
 	}
+#endif
+
+	if (mtx_initialized(&txq->vtntx_mtx) != 0)
+		mtx_destroy(&txq->vtntx_mtx);
 }
 
 static void
-vtnet_set_hwaddr(struct vtnet_softc *sc)
+vtnet_free_rxtx_queues(struct vtnet_softc *sc)
 {
-	device_t dev;
+	int i;
 
-	dev = sc->vtnet_dev;
+	if (sc->vtnet_rxqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
+		free(sc->vtnet_rxqs, M_DEVBUF);
+		sc->vtnet_rxqs = NULL;
+	}
 
-	virtio_write_device_config(dev,
-	    offsetof(struct virtio_net_config, mac),
-	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+	if (sc->vtnet_txqs != NULL) {
+		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
+			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
+		free(sc->vtnet_txqs, M_DEVBUF);
+		sc->vtnet_txqs = NULL;
+	}
 }
 
 static int
-vtnet_is_link_up(struct vtnet_softc *sc)
+vtnet_alloc_rx_filters(struct vtnet_softc *sc)
+{
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
+		    M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (sc->vtnet_mac_filter == NULL)
+			return (ENOMEM);
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
+		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (sc->vtnet_vlan_filter == NULL)
+			return (ENOMEM);
+	}
+
+	return (0);
+}
+
+static void
+vtnet_free_rx_filters(struct vtnet_softc *sc)
+{
+
+	if (sc->vtnet_mac_filter != NULL) {
+		free(sc->vtnet_mac_filter, M_DEVBUF);
+		sc->vtnet_mac_filter = NULL;
+	}
+
+	if (sc->vtnet_vlan_filter != NULL) {
+		free(sc->vtnet_vlan_filter, M_DEVBUF);
+		sc->vtnet_vlan_filter = NULL;
+	}
+}
+
+static int
+vtnet_alloc_virtqueues(struct vtnet_softc *sc)
 {
 	device_t dev;
-	struct ifnet *ifp;
-	uint16_t status;
+	struct vq_alloc_info *info;
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i, idx, flags, nvqs, rxsegs, error;
 
 	dev = sc->vtnet_dev;
-	ifp = sc->vtnet_ifp;
+	flags = 0;
+
+	/*
+	 * Indirect descriptors are not needed for the Rx virtqueue when
+	 * mergeable buffers are negotiated. The header is placed inline
+	 * with the data, not in a separate descriptor, and mbuf clusters
+	 * are always physically contiguous.
+	 */
+	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+		rxsegs = 0;
+	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+		rxsegs = VTNET_MAX_RX_SEGS;
+	else
+		rxsegs = VTNET_MIN_RX_SEGS;
 
-	VTNET_LOCK_ASSERT(sc);
+	nvqs = sc->vtnet_max_vq_pairs * 2;
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+		nvqs++;
 
-	if ((ifp->if_capenable & IFCAP_LINKSTATE) == 0)
-		return (1);
+	info = malloc(sizeof(struct vq_alloc_info) * nvqs , M_TEMP, M_NOWAIT);
+	if (info == NULL)
+		return (ENOMEM);
 
-	status = virtio_read_dev_config_2(dev,
-	    offsetof(struct virtio_net_config, status));
+	for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
+		rxq = &sc->vtnet_rxqs[i];
+		VQ_ALLOC_INFO_INIT(&info[idx], rxsegs,
+		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
+		    "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
 
-	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+		txq = &sc->vtnet_txqs[i];
+		VQ_ALLOC_INFO_INIT(&info[idx+1], VTNET_MAX_TX_SEGS,
+		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
+		    "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
+		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
+		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
+	}
+
+	/*
+	 * Enable interrupt binding if this is multiqueue. This only matters
+	 * when per-vq MSIX is available.
+	 */
+	if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
+		flags |= 0;
+
+	error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
+	free(info, M_TEMP);
+
+	return (error);
 }
 
-static void
-vtnet_update_link_status(struct vtnet_softc *sc)
+static int
+vtnet_setup_interface(struct vtnet_softc *sc)
 {
+	device_t dev;
 	struct ifnet *ifp;
-	int link;
+	int limit;
 
-	ifp = sc->vtnet_ifp;
+	dev = sc->vtnet_dev;
 
-	link = vtnet_is_link_up(sc);
+	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
+	if (ifp == NULL) {
+		device_printf(dev, "cannot allocate ifnet structure\n");
+		return (ENOSPC);
+	}
 
-	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
-		sc->vtnet_flags |= VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_UP);
-		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-			vtnet_start_locked(ifp);
-	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
-		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-		if_link_state_change(ifp, LINK_STATE_DOWN);
+	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+	if_initbaudrate(ifp, IF_Gbps(10));	/* Approx. */
+	ifp->if_softc = sc;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_init = vtnet_init;
+	ifp->if_ioctl = vtnet_ioctl;
+
+#ifndef VTNET_LEGACY_TX
+	ifp->if_transmit = vtnet_txq_mq_start;
+	ifp->if_qflush = vtnet_qflush;
+#else
+	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
+	ifp->if_start = vtnet_start;
+	IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
+	ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
+	IFQ_SET_READY(&ifp->if_snd);
+#endif
+
+	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
+	    vtnet_ifmedia_sts);
+	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
+	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
+
+	/* Read (or generate) the MAC address for the adapter. */
+	vtnet_get_hwaddr(sc);
+
+	ether_ifattach(ifp, sc->vtnet_hwaddr);
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
+		ifp->if_capabilities |= IFCAP_LINKSTATE;
+
+	/* Tell the upper layer(s) we support long frames. */
+	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
+		ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
+
+		if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
+			ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
+			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+		} else {
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
+				ifp->if_capabilities |= IFCAP_TSO4;
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
+				ifp->if_capabilities |= IFCAP_TSO6;
+			if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
+				sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
+		}
+
+		if (ifp->if_capabilities & IFCAP_TSO)
+			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
+	}
+
+	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
+		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+
+	if (ifp->if_capabilities & IFCAP_HWCSUM) {
+		/*
+		 * VirtIO does not support VLAN tagging, but we can fake
+		 * it by inserting and removing the 802.1Q header during
+		 * transmit and receive. We are then able to do checksum
+		 * offloading of VLAN frames.
+		 */
+		ifp->if_capabilities |=
+		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
+	}
+
+	ifp->if_capenable = ifp->if_capabilities;
+
+	/*
+	 * Capabilities after here are not enabled by default.
+	 */
+
+	if (ifp->if_capabilities & IFCAP_RXCSUM) {
+		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
+		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
+			ifp->if_capabilities |= IFCAP_LRO;
+	}
+
+	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
+		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+
+		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
+		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+
+	return (0);
 }
 
-static void
-vtnet_watchdog(struct vtnet_softc *sc)
+static int
+vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
 {
 	struct ifnet *ifp;
+	int frame_size, clsize;
 
 	ifp = sc->vtnet_ifp;
 
-#ifdef VTNET_TX_INTR_MODERATION
-	vtnet_txeof(sc);
-#endif
+	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
+		return (EINVAL);
 
-	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
-		return;
+	frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
+	    new_mtu;
 
-	if_printf(ifp, "watchdog timeout -- resetting\n");
-#ifdef VTNET_DEBUG
-	virtqueue_dump(sc->vtnet_tx_vq);
-#endif
-	ifp->if_oerrors++;
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	vtnet_init_locked(sc);
+	/*
+	 * Based on the new MTU (and hence frame size) determine which
+	 * cluster size is most appropriate for the receive queues.
+	 */
+	if (frame_size <= MCLBYTES) {
+		clsize = MCLBYTES;
+	} else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+		/* Avoid going past 9K jumbos. */
+		if (frame_size > MJUM9BYTES)
+			return (EINVAL);
+		clsize = MJUM9BYTES;
+	} else
+		clsize = MJUMPAGESIZE;
+
+	ifp->if_mtu = new_mtu;
+	sc->vtnet_rx_new_clsize = clsize;
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		vtnet_init_locked(sc);
+	}
+
+	return (0);
 }
 
 static int
@@ -771,22 +1033,19 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *) data;
-	reinit = 0;
 	error = 0;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
-		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
-			error = EINVAL;
-		else if (ifp->if_mtu != ifr->ifr_mtu) {
-			VTNET_LOCK(sc);
+		if (ifp->if_mtu != ifr->ifr_mtu) {
+			VTNET_CORE_LOCK(sc);
 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
-			VTNET_UNLOCK(sc);
+			VTNET_CORE_UNLOCK(sc);
 		}
 		break;
 
 	case SIOCSIFFLAGS:
-		VTNET_LOCK(sc);
+		VTNET_CORE_LOCK(sc);
 		if ((ifp->if_flags & IFF_UP) == 0) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				vtnet_stop(sc);
@@ -803,16 +1062,17 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 		if (error == 0)
 			sc->vtnet_if_flags = ifp->if_flags;
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
-		VTNET_LOCK(sc);
-		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
-		    (ifp->if_drv_flags & IFF_DRV_RUNNING))
+		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
+			break;
+		VTNET_CORE_LOCK(sc);
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vtnet_rx_filter_mac(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFMEDIA:
@@ -821,68 +1081,36 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		break;
 
 	case SIOCSIFCAP:
+		VTNET_CORE_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
-#ifdef DEVICE_POLLING
-		if (mask & IFCAP_POLLING) {
-			if (ifr->ifr_reqcap & IFCAP_POLLING) {
-				error = ether_poll_register(vtnet_poll, ifp);
-				if (error)
-					break;
-
-				VTNET_LOCK(sc);
-				vtnet_disable_rx_intr(sc);
-				vtnet_disable_tx_intr(sc);
-				ifp->if_capenable |= IFCAP_POLLING;
-				VTNET_UNLOCK(sc);
-			} else {
-				error = ether_poll_deregister(ifp);
-
-				/* Enable interrupts even in error case. */
-				VTNET_LOCK(sc);
-				vtnet_enable_tx_intr(sc);
-				vtnet_enable_rx_intr(sc);
-				ifp->if_capenable &= ~IFCAP_POLLING;
-				VTNET_UNLOCK(sc);
-			}
-		}
-#endif
-		VTNET_LOCK(sc);
-
-		if (mask & IFCAP_TXCSUM) {
+		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
-			if (ifp->if_capenable & IFCAP_TXCSUM)
-				ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
-			else
-				ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
-		}
-
-		if (mask & IFCAP_TSO4) {
+		if (mask & IFCAP_TXCSUM_IPV6)
+			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
-			if (ifp->if_capenable & IFCAP_TSO4)
-				ifp->if_hwassist |= CSUM_TSO;
-			else
-				ifp->if_hwassist &= ~CSUM_TSO;
-		}
-
-		if (mask & IFCAP_RXCSUM) {
-			ifp->if_capenable ^= IFCAP_RXCSUM;
-			reinit = 1;
-		}
+		if (mask & IFCAP_TSO6)
+			ifp->if_capenable ^= IFCAP_TSO6;
 
-		if (mask & IFCAP_LRO) {
-			ifp->if_capenable ^= IFCAP_LRO;
+		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
+		    IFCAP_VLAN_HWFILTER)) {
+			/* These Rx features require us to renegotiate. */
 			reinit = 1;
-		}
 
-		if (mask & IFCAP_VLAN_HWFILTER) {
-			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
-			reinit = 1;
-		}
+			if (mask & IFCAP_RXCSUM)
+				ifp->if_capenable ^= IFCAP_RXCSUM;
+			if (mask & IFCAP_RXCSUM_IPV6)
+				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+			if (mask & IFCAP_LRO)
+				ifp->if_capenable ^= IFCAP_LRO;
+			if (mask & IFCAP_VLAN_HWFILTER)
+				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+		} else
+			reinit = 0;
 
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
-
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 
@@ -890,9 +1118,10 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			vtnet_init_locked(sc);
 		}
+
+		VTNET_CORE_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 
-		VTNET_UNLOCK(sc);
 		break;
 
 	default:
@@ -900,80 +1129,32 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		break;
 	}
 
-	VTNET_LOCK_ASSERT_NOTOWNED(sc);
+	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
 
 	return (error);
 }
 
 static int
-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
-{
-	struct ifnet *ifp;
-	int new_frame_size, clsize;
-
-	ifp = sc->vtnet_ifp;
-
-	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
-		new_frame_size = sizeof(struct vtnet_rx_header) +
-		    sizeof(struct ether_vlan_header) + new_mtu;
-
-		if (new_frame_size > MJUM9BYTES)
-			return (EINVAL);
-
-		if (new_frame_size <= MCLBYTES)
-			clsize = MCLBYTES;
-		else
-			clsize = MJUM9BYTES;
-	} else {
-		new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
-		    sizeof(struct ether_vlan_header) + new_mtu;
-
-		if (new_frame_size <= MCLBYTES)
-			clsize = MCLBYTES;
-		else
-			clsize = MJUMPAGESIZE;
-	}
-
-	sc->vtnet_rx_mbuf_size = clsize;
-	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
-	KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
-	    ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
-
-	ifp->if_mtu = new_mtu;
-
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-		vtnet_init_locked(sc);
-	}
-
-	return (0);
-}
-
-static int
-vtnet_init_rx_vq(struct vtnet_softc *sc)
+vtnet_rxq_populate(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	int nbufs, error;
 
-	vq = sc->vtnet_rx_vq;
-	nbufs = 0;
+	vq = rxq->vtnrx_vq;
 	error = ENOSPC;
 
-	while (!virtqueue_full(vq)) {
-		if ((error = vtnet_newbuf(sc)) != 0)
+	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
+		error = vtnet_rxq_new_buf(rxq);
+		if (error)
 			break;
-		nbufs++;
 	}
 
 	if (nbufs > 0) {
 		virtqueue_notify(vq);
-
 		/*
 		 * EMSGSIZE signifies the virtqueue did not have enough
 		 * entries available to hold the last mbuf. This is not
-		 * an error. We should not get ENOSPC since we check if
-		 * the virtqueue is full before attempting to add a
-		 * buffer.
+		 * an error.
 		 */
 		if (error == EMSGSIZE)
 			error = 0;
@@ -983,86 +1164,32 @@ vtnet_init_rx_vq(struct vtnet_softc *sc)
 }
 
 static void
-vtnet_free_rx_mbufs(struct vtnet_softc *sc)
+vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
 {
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int last;
 
-	vq = sc->vtnet_rx_vq;
+	vq = rxq->vtnrx_vq;
 	last = 0;
 
 	while ((m = virtqueue_drain(vq, &last)) != NULL)
 		m_freem(m);
 
-	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
-}
-
-static void
-vtnet_free_tx_mbufs(struct vtnet_softc *sc)
-{
-	struct virtqueue *vq;
-	struct vtnet_tx_header *txhdr;
-	int last;
-
-	vq = sc->vtnet_tx_vq;
-	last = 0;
-
-	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
-		m_freem(txhdr->vth_mbuf);
-		uma_zfree(vtnet_tx_header_zone, txhdr);
-	}
-
-	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
-}
-
-static void
-vtnet_free_ctrl_vq(struct vtnet_softc *sc)
-{
-
-	/*
-	 * The control virtqueue is only polled, therefore
-	 * it should already be empty.
-	 */
-	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
-	    ("Ctrl Vq not empty"));
-}
-
-#ifdef DEVICE_POLLING
-static int
-vtnet_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
-{
-	struct vtnet_softc *sc;
-	int rx_done;
-
-	sc = ifp->if_softc;
-	rx_done = 0;
-
-	VTNET_LOCK(sc);
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		if (cmd == POLL_AND_CHECK_STATUS)
-			vtnet_update_link_status(sc);
-
-		if (virtqueue_nused(sc->vtnet_rx_vq) > 0)
-			vtnet_rxeof(sc, count, &rx_done);
-
-		vtnet_txeof(sc);
-		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-			vtnet_start_locked(ifp);
-	}
-	VTNET_UNLOCK(sc);
-
-	return (rx_done);
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
 }
-#endif /* DEVICE_POLLING */
 
 static struct mbuf *
-vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
+vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
 	int i, clsize;
 
-	clsize = sc->vtnet_rx_mbuf_size;
+	clsize = sc->vtnet_rx_clsize;
+
+	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
+	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
 
 	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
 	if (m_head == NULL)
@@ -1071,19 +1198,15 @@ vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 	m_head->m_len = clsize;
 	m_tail = m_head;
 
-	if (nbufs > 1) {
-		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
-		    ("chained Rx mbuf requested without LRO_NOMRG"));
-
-		for (i = 1; i < nbufs; i++) {
-			m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
-			if (m == NULL)
-				goto fail;
+	/* Allocate the rest of the chain. */
+	for (i = 1; i < nbufs; i++) {
+		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
+		if (m == NULL)
+			goto fail;
 
-			m->m_len = clsize;
-			m_tail->m_next = m;
-			m_tail = m;
-		}
+		m->m_len = clsize;
+		m_tail->m_next = m;
+		m_tail = m;
 	}
 
 	if (m_tailp != NULL)
@@ -1098,43 +1221,48 @@ fail:
 	return (NULL);
 }
 
+/*
+ * Slow path for when LRO without mergeable buffers is negotiated.
+ */
 static int
-vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
+vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
+    int len0)
 {
+	struct vtnet_softc *sc;
 	struct mbuf *m, *m_prev;
 	struct mbuf *m_new, *m_tail;
 	int len, clsize, nreplace, error;
 
-	m = m0;
-	m_prev = NULL;
-	len = len0;
+	sc = rxq->vtnrx_sc;
+	clsize = sc->vtnet_rx_clsize;
 
+	m_prev = NULL;
 	m_tail = NULL;
-	clsize = sc->vtnet_rx_mbuf_size;
 	nreplace = 0;
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
-	    m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
+	m = m0;
+	len = len0;
 
 	/*
-	 * Since LRO_NOMRG mbuf chains are so large, we want to avoid
-	 * allocating an entire chain for each received frame. When
-	 * the received frame's length is less than that of the chain,
-	 * the unused mbufs are reassigned to the new chain.
+	 * Since these mbuf chains are so large, we avoid allocating an
+	 * entire replacement chain if possible. When the received frame
+	 * did not consume the entire chain, the unused mbufs are moved
+	 * to the replacement chain.
 	 */
 	while (len > 0) {
 		/*
-		 * Something is seriously wrong if we received
-		 * a frame larger than the mbuf chain. Drop it.
+		 * Something is seriously wrong if we received a frame
+		 * larger than the chain. Drop it.
 		 */
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
+		/* We always allocate the same cluster size. */
 		KASSERT(m->m_len == clsize,
-		    ("mbuf length not expected cluster size: %d",
-		    m->m_len));
+		    ("%s: mbuf size %d is not the cluster size %d",
+		    __func__, m->m_len, clsize));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
@@ -1144,27 +1272,26 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
 		nreplace++;
 	}
 
-	KASSERT(m_prev != NULL, ("m_prev == NULL"));
-	KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
-	    ("too many replacement mbufs: %d/%d", nreplace,
-	    sc->vtnet_rx_mbuf_count));
+	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
+	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
+	    sc->vtnet_rx_nmbufs));
 
-	m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
+	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
 		m_prev->m_len = clsize;
 		return (ENOBUFS);
 	}
 
 	/*
-	 * Move unused mbufs, if any, from the original chain
-	 * onto the end of the new chain.
+	 * Move any unused mbufs from the received chain onto the end
+	 * of the new chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
 		m_prev->m_next = NULL;
 	}
 
-	error = vtnet_enqueue_rxbuf(sc, m_new);
+	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
 		 * BAD! We could not enqueue the replacement mbuf chain. We
@@ -1189,343 +1316,321 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
 }
 
 static int
-vtnet_newbuf(struct vtnet_softc *sc)
+vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
 {
-	struct mbuf *m;
+	struct vtnet_softc *sc;
+	struct mbuf *m_new;
 	int error;
 
-	m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
-	if (m == NULL)
-		return (ENOBUFS);
+	sc = rxq->vtnrx_sc;
 
-	error = vtnet_enqueue_rxbuf(sc, m);
-	if (error)
-		m_freem(m);
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+	    ("%s: chained mbuf without LRO_NOMRG", __func__));
 
-	return (error);
-}
-
-static void
-vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
-{
-	struct virtqueue *vq;
-	struct mbuf *m;
-
-	vq = sc->vtnet_rx_vq;
+	if (m->m_next == NULL) {
+		/* Fast-path for the common case of just one mbuf. */
+		if (m->m_len < len)
+			return (EINVAL);
 
-	while (--nbufs > 0) {
-		if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
-			break;
-		vtnet_discard_rxbuf(sc, m);
-	}
-}
+		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
+		if (m_new == NULL)
+			return (ENOBUFS);
 
-static void
-vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
-{
-	int error;
+		error = vtnet_rxq_enqueue_buf(rxq, m_new);
+		if (error) {
+			/*
+			 * The new mbuf is suppose to be an identical
+			 * copy of the one just dequeued so this is an
+			 * unexpected error.
+			 */
+			m_freem(m_new);
+			sc->vtnet_stats.rx_enq_replacement_failed++;
+		} else
+			m->m_len = len;
+	} else
+		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
 
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = vtnet_enqueue_rxbuf(sc, m);
-	KASSERT(error == 0, ("cannot requeue discarded mbuf"));
+	return (error);
 }
 
 static int
-vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
+vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
 {
 	struct sglist sg;
 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
+	struct vtnet_softc *sc;
 	struct vtnet_rx_header *rxhdr;
-	struct virtio_net_hdr *hdr;
 	uint8_t *mdata;
 	int offset, error;
 
-	VTNET_LOCK_ASSERT(sc);
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ||
-	    m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG"));
-
-	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
-
+	sc = rxq->vtnrx_sc;
 	mdata = mtod(m, uint8_t *);
-	offset = 0;
 
+	VTNET_RXQ_LOCK_ASSERT(rxq);
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
+	    ("%s: chained mbuf without LRO_NOMRG", __func__));
+	KASSERT(m->m_len == sc->vtnet_rx_clsize,
+	    ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
+	     sc->vtnet_rx_clsize));
+
+	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
+		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 		rxhdr = (struct vtnet_rx_header *) mdata;
-		hdr = &rxhdr->vrh_hdr;
-		offset += sizeof(struct vtnet_rx_header);
-
-		error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
-		KASSERT(error == 0, ("cannot add header to sglist"));
-	}
-
-	error = sglist_append(&sg, mdata + offset, m->m_len - offset);
-	if (error)
-		return (error);
+		sglist_append(&sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
+		offset = sizeof(struct vtnet_rx_header);
+	} else
+		offset = 0;
 
+	sglist_append(&sg, mdata + offset, m->m_len - offset);
 	if (m->m_next != NULL) {
 		error = sglist_append_mbuf(&sg, m->m_next);
-		if (error)
-			return (error);
+		MPASS(error == 0);
 	}
 
-	return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
+	error = virtqueue_enqueue(rxq->vtnrx_vq, m, &sg, 0, sg.sg_nseg);
+
+	return (error);
 }
 
-static void
-vtnet_vlan_tag_remove(struct mbuf *m)
+static int
+vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 {
-	struct ether_vlan_header *evl;
+	struct vtnet_softc *sc;
+	struct mbuf *m;
+	int error;
 
-	evl = mtod(m, struct ether_vlan_header *);
+	sc = rxq->vtnrx_sc;
 
-	m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
-	m->m_flags |= M_VLANTAG;
+	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
+	if (m == NULL)
+		return (ENOBUFS);
 
-	/* Strip the 802.1Q header. */
-	bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
-	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
-	m_adj(m, ETHER_VLAN_ENCAP_LEN);
+	error = vtnet_rxq_enqueue_buf(rxq, m);
+	if (error)
+		m_freem(m);
+
+	return (error);
 }
 
-#ifdef notyet
+/*
+ * Use the checksum offset in the VirtIO header to set the
+ * correct CSUM_* flags.
+ */
 static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
-    struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
+    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
 {
-	struct ether_header *eh;
-	struct ether_vlan_header *evh;
-	struct ip *ip;
-	struct ip6_hdr *ip6;
-	struct udphdr *udp;
-	int ip_offset, csum_start, csum_offset, hlen;
-	uint16_t eth_type;
-	uint8_t ip_proto;
-
-	/*
-	 * Convert the VirtIO checksum interface to FreeBSD's interface.
-	 * The host only provides us with the offset at which to start
-	 * checksumming, and the offset from that to place the completed
-	 * checksum. While this maps well with how Linux does checksums,
-	 * for FreeBSD, we must parse the received packet in order to set
-	 * the appropriate CSUM_* flags.
-	 */
-
-	/*
-	 * Every mbuf added to the receive virtqueue is always at least
-	 * MCLBYTES big, so assume something is amiss if the first mbuf
-	 * does not contain both the Ethernet and protocol headers.
-	 */
-	ip_offset = sizeof(struct ether_header);
-	if (m->m_len < ip_offset)
-		return (1);
+	struct vtnet_softc *sc;
+#if defined(INET) || defined(INET6)
+	int offset = hdr->csum_start + hdr->csum_offset;
+#endif
 
-	eh = mtod(m, struct ether_header *);
-	eth_type = ntohs(eh->ether_type);
-	if (eth_type == ETHERTYPE_VLAN) {
-		ip_offset = sizeof(struct ether_vlan_header);
-		if (m->m_len < ip_offset)
-			return (1);
-		evh = mtod(m, struct ether_vlan_header *);
-		eth_type = ntohs(evh->evl_proto);
-	}
+	sc = rxq->vtnrx_sc;
 
+	/* Only do a basic sanity check on the offset. */
 	switch (eth_type) {
+#if defined(INET)
 	case ETHERTYPE_IP:
-		if (m->m_len < ip_offset + sizeof(struct ip))
-			return (1);
-
-		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
-		 /* Sanity check the IP header. */
-		if (ip->ip_v != IPVERSION)
-			return (1);
-		hlen = ip->ip_hl << 2;
-		if (hlen < sizeof(struct ip))
-			return (1);
-		if (ntohs(ip->ip_len) < hlen)
+		if (__predict_false(offset < ip_start + sizeof(struct ip)))
 			return (1);
-		if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset))
-			return (1);
-
-		ip_proto = ip->ip_p;
-		csum_start = ip_offset + hlen;
 		break;
-
+#endif
+#if defined(INET6)
 	case ETHERTYPE_IPV6:
-		if (m->m_len < ip_offset + sizeof(struct ip6_hdr))
+		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
 			return (1);
-
-		/*
-		 * XXX FreeBSD does not handle any IPv6 checksum offloading
-		 * at the moment.
-		 */
-
-		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
-		/* XXX Assume no extension headers are present. */
-		ip_proto = ip6->ip6_nxt;
-		csum_start = ip_offset + sizeof(struct ip6_hdr);
 		break;
-
+#endif
 	default:
 		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
-	/* Assume checksum begins right after the IP header. */
-	if (hdr->csum_start != csum_start) {
-		sc->vtnet_stats.rx_csum_bad_start++;
-		return (1);
-	}
-
-	switch (ip_proto) {
-	case IPPROTO_TCP:
-		csum_offset = offsetof(struct tcphdr, th_sum);
-		break;
-
-	case IPPROTO_UDP:
-		csum_offset = offsetof(struct udphdr, uh_sum);
+	/*
+	 * Use the offset to determine the appropriate CSUM_* flags. This is
+	 * a bit dirty, but we can get by with it since the checksum offsets
+	 * happen to be different. We assume the host host does not do IPv4
+	 * header checksum offloading.
+	 */
+	switch (hdr->csum_offset) {
+	case offsetof(struct udphdr, uh_sum):
+	case offsetof(struct tcphdr, th_sum):
+		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
-
-	case IPPROTO_SCTP:
-		csum_offset = offsetof(struct sctphdr, checksum);
+	case offsetof(struct sctphdr, checksum):
+		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
-
 	default:
-		sc->vtnet_stats.rx_csum_bad_ipproto++;
-		return (1);
-	}
-
-	if (hdr->csum_offset != csum_offset) {
 		sc->vtnet_stats.rx_csum_bad_offset++;
 		return (1);
 	}
 
-	/*
-	 * The IP header checksum is almost certainly valid but I'm
-	 * uncertain if that is guaranteed.
-	 *
-	 * m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
-	 */
+	return (0);
+}
 
-	switch (ip_proto) {
-	case IPPROTO_UDP:
-		if (m->m_len < csum_start + sizeof(struct udphdr))
-			return (1);
+static int
+vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
+    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	int offset, proto;
 
-		udp = (struct udphdr *)(mtod(m, uint8_t *) + csum_start);
-		if (udp->uh_sum == 0)
-			return (0);
+	sc = rxq->vtnrx_sc;
 
-		/* FALLTHROUGH */
+	switch (eth_type) {
+#if defined(INET)
+	case ETHERTYPE_IP: {
+		struct ip *ip;
+		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
+			return (1);
+		ip = (struct ip *)(m->m_data + ip_start);
+		proto = ip->ip_p;
+		offset = ip_start + (ip->ip_hl << 2);
+		break;
+	}
+#endif
+#if defined(INET6)
+	case ETHERTYPE_IPV6:
+		if (__predict_false(m->m_len < ip_start +
+		    sizeof(struct ip6_hdr)))
+			return (1);
+		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
+		if (__predict_false(offset < 0))
+			return (1);
+		break;
+#endif
+	default:
+		sc->vtnet_stats.rx_csum_bad_ethtype++;
+		return (1);
+	}
 
+	switch (proto) {
 	case IPPROTO_TCP:
+		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
+			return (1);
+		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+		m->m_pkthdr.csum_data = 0xFFFF;
+		break;
+	case IPPROTO_UDP:
+		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
+			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
-
 	case IPPROTO_SCTP:
+		if (__predict_false(m->m_len < offset + sizeof(struct sctphdr)))
+			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 		break;
+	default:
+		/*
+		 * For the remaining protocols, FreeBSD does not support
+		 * checksum offloading, so the checksum will be recomputed.
+		 */
+#if 0
+		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
+		    "protocol eth_type=%#x proto=%d csum_start=%d
+		    "csum_offset=%d\n", __func__, eth_type, proto,
+		    hdr->csum_start, hdr->csum_offset);
+#endif
+		break;
 	}
 
-	sc->vtnet_stats.rx_csum_offloaded++;
-
 	return (0);
 }
-#endif
 
 /*
- * Alternative method of doing receive checksum offloading. Rather
- * than parsing the received frame down to the IP header, use the
- * csum_offset to determine which CSUM_* flags are appropriate. We
- * can get by with doing this only because the checksum offsets are
- * unique for the things we care about.
+ * Set the appropriate CSUM_* flags. Unfortunately, the information
+ * provided is not directly useful to us. The VirtIO header gives the
+ * offset of the checksum, which is all Linux needs, but this is not
+ * how FreeBSD does things. We are forced to peek inside the packet
+ * a bit.
+ *
+ * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
+ * could accept the offsets and let the stack figure it out.
  */
 static int
-vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
 	struct ether_header *eh;
 	struct ether_vlan_header *evh;
-	struct udphdr *udp;
-	int csum_len;
 	uint16_t eth_type;
-
-	csum_len = hdr->csum_start + hdr->csum_offset;
-
-	if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
-		return (1);
-	if (m->m_len < csum_len)
-		return (1);
+	int offset, error;
 
 	eh = mtod(m, struct ether_header *);
 	eth_type = ntohs(eh->ether_type);
 	if (eth_type == ETHERTYPE_VLAN) {
+		/* BMV: We should handle nested VLAN tags too. */
 		evh = mtod(m, struct ether_vlan_header *);
 		eth_type = ntohs(evh->evl_proto);
-	}
-
-	if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
-		sc->vtnet_stats.rx_csum_bad_ethtype++;
-		return (1);
-	}
-
-	/* Use the offset to determine the appropriate CSUM_* flags. */
-	switch (hdr->csum_offset) {
-	case offsetof(struct udphdr, uh_sum):
-		if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
-			return (1);
-		udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
-		if (udp->uh_sum == 0)
-			return (0);
+		offset = sizeof(struct ether_vlan_header);
+	} else
+		offset = sizeof(struct ether_header);
 
-		/* FALLTHROUGH */
+	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
+	else
+		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
 
-	case offsetof(struct tcphdr, th_sum):
-		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
-		m->m_pkthdr.csum_data = 0xFFFF;
-		break;
+	return (error);
+}
 
-	case offsetof(struct sctphdr, checksum):
-		m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-		break;
+static void
+vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
+{
+	struct mbuf *m;
 
-	default:
-		sc->vtnet_stats.rx_csum_bad_offset++;
-		return (1);
+	while (--nbufs > 0) {
+		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
+		if (m == NULL)
+			break;
+		vtnet_rxq_discard_buf(rxq, m);
 	}
+}
 
-	sc->vtnet_stats.rx_csum_offloaded++;
+static void
+vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
+{
+	int error;
 
-	return (0);
+	/*
+	 * Requeue the discarded mbuf. This should always be successful
+	 * since it was just dequeued.
+	 */
+	error = vtnet_rxq_enqueue_buf(rxq, m);
+	KASSERT(error == 0,
+	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
 }
 
 static int
-vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
+vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
 {
+	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m, *m_tail;
 	int len;
 
+	sc = rxq->vtnrx_sc;
+	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
-	vq = sc->vtnet_rx_vq;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
-			ifp->if_ierrors++;
+			rxq->vtnrx_stats.vrxs_ierrors++;
 			goto fail;
 		}
 
-		if (vtnet_newbuf(sc) != 0) {
-			ifp->if_iqdrops++;
-			vtnet_discard_rxbuf(sc, m);
+		if (vtnet_rxq_new_buf(rxq) != 0) {
+			rxq->vtnrx_stats.vrxs_iqdrops++;
+			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
-				vtnet_discard_merged_rxbuf(sc, nbufs);
+				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			goto fail;
 		}
 
@@ -1549,35 +1654,83 @@ fail:
 	return (1);
 }
 
+static void
+vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
+    struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	struct ifnet *ifp;
+	struct ether_header *eh;
+
+	sc = rxq->vtnrx_sc;
+	ifp = sc->vtnet_ifp;
+
+	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
+		eh = mtod(m, struct ether_header *);
+		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
+			vtnet_vlan_tag_remove(m);
+			/*
+			 * With the 802.1Q header removed, update the
+			 * checksum starting location accordingly.
+			 */
+			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
+		}
+	}
+
+	m->m_pkthdr.flowid = rxq->vtnrx_id;
+	m->m_flags |= M_FLOWID;
+
+	/*
+	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
+	 * distinction that Linux does. Need to reevaluate if performing
+	 * offloading for the NEEDS_CSUM case is really appropriate.
+	 */
+	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
+	    VIRTIO_NET_HDR_F_DATA_VALID)) {
+		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
+			rxq->vtnrx_stats.vrxs_csum++;
+		else
+			rxq->vtnrx_stats.vrxs_csum_failed++;
+	}
+
+	rxq->vtnrx_stats.vrxs_ipackets++;
+	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
+
+	/* VTNET_RXQ_UNLOCK(rxq); */
+	(*ifp->if_input)(ifp, m);
+	/* VTNET_RXQ_LOCK(rxq); */
+}
+
 static int
-vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
+vtnet_rxq_eof(struct vtnet_rxq *rxq)
 {
-	struct virtio_net_hdr lhdr;
+	struct virtio_net_hdr lhdr, *hdr;
+	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
 	struct mbuf *m;
-	struct ether_header *eh;
-	struct virtio_net_hdr *hdr;
 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
-	int len, deq, nbufs, adjsz, rx_npkts;
+	int len, deq, nbufs, adjsz, count;
 
+	sc = rxq->vtnrx_sc;
+	vq = rxq->vtnrx_vq;
 	ifp = sc->vtnet_ifp;
-	vq = sc->vtnet_rx_vq;
 	hdr = &lhdr;
 	deq = 0;
-	rx_npkts = 0;
+	count = sc->vtnet_rx_process_limit;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_RXQ_LOCK_ASSERT(rxq);
 
-	while (--count >= 0) {
+	while (count-- > 0) {
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
 		deq++;
 
 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
-			ifp->if_ierrors++;
-			vtnet_discard_rxbuf(sc, m);
+			rxq->vtnrx_stats.vrxs_ierrors++;
+			vtnet_rxq_discard_buf(rxq, m);
 			continue;
 		}
 
@@ -1585,8 +1738,8 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
 			/*
-			 * Account for our pad between the header and
-			 * the actual start of the frame.
+			 * Account for our pad inserted between the header
+			 * and the actual start of the frame.
 			 */
 			len += VTNET_RX_HEADER_PAD;
 		} else {
@@ -1595,11 +1748,11 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 		}
 
-		if (vtnet_replace_rxbuf(sc, m, len) != 0) {
-			ifp->if_iqdrops++;
-			vtnet_discard_rxbuf(sc, m);
+		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
+			rxq->vtnrx_stats.vrxs_iqdrops++;
+			vtnet_rxq_discard_buf(rxq, m);
 			if (nbufs > 1)
-				vtnet_discard_merged_rxbuf(sc, nbufs);
+				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
 			continue;
 		}
 
@@ -1608,263 +1761,297 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
 		m->m_pkthdr.csum_flags = 0;
 
 		if (nbufs > 1) {
-			if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
+			/* Dequeue the rest of chain. */
+			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
 				continue;
 		}
 
-		ifp->if_ipackets++;
-
 		/*
 		 * Save copy of header before we strip it. For both mergeable
-		 * and non-mergeable, the VirtIO header is placed first in the
-		 * mbuf's data. We no longer need num_buffers, so always use a
-		 * virtio_net_hdr.
+		 * and non-mergeable, the header is at the beginning of the
+		 * mbuf data. We no longer need num_buffers, so always use a
+		 * regular header.
+		 *
+		 * BMV: Is this memcpy() expensive? We know the mbuf data is
+		 * still valid even after the m_adj().
 		 */
 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
 		m_adj(m, adjsz);
 
-		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
-			eh = mtod(m, struct ether_header *);
-			if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
-				vtnet_vlan_tag_remove(m);
-
-				/*
-				 * With the 802.1Q header removed, update the
-				 * checksum starting location accordingly.
-				 */
-				if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
-					hdr->csum_start -=
-					    ETHER_VLAN_ENCAP_LEN;
-			}
-		}
-
-		if (ifp->if_capenable & IFCAP_RXCSUM &&
-		    hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-			if (vtnet_rx_csum(sc, m, hdr) != 0)
-				sc->vtnet_stats.rx_csum_failed++;
-		}
-
-		VTNET_UNLOCK(sc);
-		rx_npkts++;
-		(*ifp->if_input)(ifp, m);
-		VTNET_LOCK(sc);
-
-		/*
-		 * The interface may have been stopped while we were
-		 * passing the packet up the network stack.
-		 */
-		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-			break;
+		vtnet_rxq_input(rxq, m, hdr);
 	}
 
 	if (deq > 0)
 		virtqueue_notify(vq);
 
-	if (rx_npktsp != NULL)
-		*rx_npktsp = rx_npkts;
-
 	return (count > 0 ? 0 : EAGAIN);
 }
 
 static void
-vtnet_rx_vq_intr(void *xsc)
+vtnet_rx_vq_intr(void *xrxq)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_rxq *rxq;
 	struct ifnet *ifp;
-	int more;
+	int tries, more;
 
-	sc = xsc;
+	rxq = xrxq;
+	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
+	tries = 0;
+
+	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
+		/*
+		 * Ignore this interrupt. Either this is a spurious interrupt
+		 * or multiqueue without per-VQ MSIX so every queue needs to
+		 * be polled (a brain dead configuration we could try harder
+		 * to avoid).
+		 */
+		vtnet_rxq_disable_intr(rxq);
+		return;
+	}
 
 again:
-	VTNET_LOCK(sc);
+	VTNET_RXQ_LOCK(rxq);
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		VTNET_UNLOCK(sc);
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
-#endif
+
+	more = vtnet_rxq_eof(rxq);
+	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
+		if (!more)
+			vtnet_rxq_disable_intr(rxq);
+		/*
+		 * This is an occasional condition or race (when !more),
+		 * so retry a few times before scheduling the taskqueue.
+		 */
+		rxq->vtnrx_stats.vrxs_rescheduled++;
+		VTNET_RXQ_UNLOCK(rxq);
+		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+			goto again;
+		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+	} else
+		VTNET_RXQ_UNLOCK(rxq);
+}
+
+static void
+vtnet_rxq_tq_intr(void *xrxq, int pending)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_rxq *rxq;
+	struct ifnet *ifp;
+	int more;
+
+	rxq = xrxq;
+	sc = rxq->vtnrx_sc;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_RXQ_LOCK(rxq);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
-		vtnet_enable_rx_intr(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_RXQ_UNLOCK(rxq);
 		return;
 	}
 
-	more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
-	if (more || vtnet_enable_rx_intr(sc) != 0) {
+	more = vtnet_rxq_eof(rxq);
+	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
 		if (!more)
-			vtnet_disable_rx_intr(sc);
-		sc->vtnet_stats.rx_task_rescheduled++;
-		VTNET_UNLOCK(sc);
-		goto again;
+			vtnet_rxq_disable_intr(rxq);
+		rxq->vtnrx_stats.vrxs_rescheduled++;
+		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
 	}
 
-	VTNET_UNLOCK(sc);
+	VTNET_RXQ_UNLOCK(rxq);
 }
 
 static void
-vtnet_txeof(struct vtnet_softc *sc)
+vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
-	struct ifnet *ifp;
 	struct vtnet_tx_header *txhdr;
-	int deq;
-
-	vq = sc->vtnet_tx_vq;
-	ifp = sc->vtnet_ifp;
-	deq = 0;
+	int last;
 
-	VTNET_LOCK_ASSERT(sc);
+	vq = txq->vtntx_vq;
+	last = 0;
 
-	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
-		deq++;
-		ifp->if_opackets++;
+	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
 		m_freem(txhdr->vth_mbuf);
 		uma_zfree(vtnet_tx_header_zone, txhdr);
 	}
 
-	if (deq > 0) {
-		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-		if (virtqueue_empty(vq))
-			sc->vtnet_watchdog_timer = 0;
-	}
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
 }
 
-static struct mbuf *
-vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
-    struct virtio_net_hdr *hdr)
+/*
+ * BMV: Much of this can go away once we finally have offsets in
+ * the mbuf packet header. Bug andre@.
+ */
+static int
+vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
+    int *etype, int *proto, int *start)
 {
-	struct ifnet *ifp;
-	struct ether_header *eh;
+	struct vtnet_softc *sc;
 	struct ether_vlan_header *evh;
-	struct ip *ip;
-	struct ip6_hdr *ip6;
-	struct tcphdr *tcp;
-	int ip_offset;
-	uint16_t eth_type, csum_start;
-	uint8_t ip_proto, gso_type;
+	int offset;
 
-	ifp = sc->vtnet_ifp;
+	sc = txq->vtntx_sc;
 
-	ip_offset = sizeof(struct ether_header);
-	if (m->m_len < ip_offset) {
-		if ((m = m_pullup(m, ip_offset)) == NULL)
-			return (NULL);
+	evh = mtod(m, struct ether_vlan_header *);
+	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+		/* BMV: We should handle nested VLAN tags too. */
+		*etype = ntohs(evh->evl_proto);
+		offset = sizeof(struct ether_vlan_header);
+	} else {
+		*etype = ntohs(evh->evl_encap_proto);
+		offset = sizeof(struct ether_header);
+	}
+
+	switch (*etype) {
+#if defined(INET)
+	case ETHERTYPE_IP: {
+		struct ip *ip, iphdr;
+		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
+			m_copydata(m, offset, sizeof(struct ip),
+			    (caddr_t) &iphdr);
+			ip = &iphdr;
+		} else
+			ip = (struct ip *)(m->m_data + offset);
+		*proto = ip->ip_p;
+		*start = offset + (ip->ip_hl << 2);
+		break;
 	}
-
-	eh = mtod(m, struct ether_header *);
-	eth_type = ntohs(eh->ether_type);
-	if (eth_type == ETHERTYPE_VLAN) {
-		ip_offset = sizeof(struct ether_vlan_header);
-		if (m->m_len < ip_offset) {
-			if ((m = m_pullup(m, ip_offset)) == NULL)
-				return (NULL);
-		}
-		evh = mtod(m, struct ether_vlan_header *);
-		eth_type = ntohs(evh->evl_proto);
+#endif
+#if defined(INET6)
+	case ETHERTYPE_IPV6:
+		*proto = -1;
+		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
+		/* Assert the network stack sent us a valid packet. */
+		KASSERT(*start > offset,
+		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
+		    *start, offset, *proto));
+		break;
+#endif
+	default:
+		sc->vtnet_stats.tx_csum_bad_ethtype++;
+		return (EINVAL);
 	}
 
-	switch (eth_type) {
-	case ETHERTYPE_IP:
-		if (m->m_len < ip_offset + sizeof(struct ip)) {
-			m = m_pullup(m, ip_offset + sizeof(struct ip));
-			if (m == NULL)
-				return (NULL);
-		}
+	return (0);
+}
 
-		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
-		ip_proto = ip->ip_p;
-		csum_start = ip_offset + (ip->ip_hl << 2);
-		gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-		break;
+static int
+vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
+    int offset, struct virtio_net_hdr *hdr)
+{
+	static struct timeval lastecn;
+	static int curecn;
+	struct vtnet_softc *sc;
+	struct tcphdr *tcp, tcphdr;
 
-	case ETHERTYPE_IPV6:
-		if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
-			m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
-			if (m == NULL)
-				return (NULL);
-		}
+	sc = txq->vtntx_sc;
+
+	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
+		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
+		tcp = &tcphdr;
+	} else
+		tcp = (struct tcphdr *)(m->m_data + offset);
+
+	hdr->hdr_len = offset + (tcp->th_off << 2);
+	hdr->gso_size = m->m_pkthdr.tso_segsz;
+	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
+	    VIRTIO_NET_HDR_GSO_TCPV6;
 
-		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
+	if (tcp->th_flags & TH_CWR) {
 		/*
-		 * XXX Assume no extension headers are present. Presently,
-		 * this will always be true in the case of TSO, and FreeBSD
-		 * does not perform checksum offloading of IPv6 yet.
+		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
+		 * ECN support is not on a per-interface basis, but globally via
+		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
 		 */
-		ip_proto = ip6->ip6_nxt;
-		csum_start = ip_offset + sizeof(struct ip6_hdr);
-		gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		break;
-
-	default:
-		return (m);
+		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
+			if (ppsratecheck(&lastecn, &curecn, 1))
+				if_printf(sc->vtnet_ifp,
+				    "TSO with ECN not negotiated with host\n");
+			return (ENOTSUP);
+		}
+		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 	}
 
-	if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
-		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		hdr->csum_start = csum_start;
-		hdr->csum_offset = m->m_pkthdr.csum_data;
+	txq->vtntx_stats.vtxs_tso++;
 
-		sc->vtnet_stats.tx_csum_offloaded++;
-	}
+	return (0);
+}
 
-	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
-		if (ip_proto != IPPROTO_TCP)
-			return (m);
+static struct mbuf *
+vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
+    struct virtio_net_hdr *hdr)
+{
+	struct vtnet_softc *sc;
+	int flags, etype, csum_start, proto, error;
 
-		if (m->m_len < csum_start + sizeof(struct tcphdr)) {
-			m = m_pullup(m, csum_start + sizeof(struct tcphdr));
-			if (m == NULL)
-				return (NULL);
-		}
+	sc = txq->vtntx_sc;
+	flags = m->m_pkthdr.csum_flags;
 
-		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
-		hdr->gso_type = gso_type;
-		hdr->hdr_len = csum_start + (tcp->th_off << 2);
-		hdr->gso_size = m->m_pkthdr.tso_segsz;
+	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
+	if (error)
+		goto drop;
 
-		if (tcp->th_flags & TH_CWR) {
-			/*
-			 * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
-			 * ECN support is only configurable globally with the
-			 * net.inet.tcp.ecn.enable sysctl knob.
-			 */
-			if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
-				if_printf(ifp, "TSO with ECN not supported "
-				    "by host\n");
-				m_freem(m);
-				return (NULL);
-			}
+	if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
+	    (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
+		/*
+		 * We could compare the IP protocol vs the CSUM_ flag too,
+		 * but that really should not be necessary.
+		 */
+		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		hdr->csum_start = csum_start;
+		hdr->csum_offset = m->m_pkthdr.csum_data;
+		txq->vtntx_stats.vtxs_csum++;
+	}
 
-			hdr->flags |= VIRTIO_NET_HDR_GSO_ECN;
+	if (flags & CSUM_TSO) {
+		if (__predict_false(proto != IPPROTO_TCP)) {
+			/* Likely failed to correctly parse the mbuf. */
+			sc->vtnet_stats.tx_tso_not_tcp++;
+			goto drop;
 		}
 
-		sc->vtnet_stats.tx_tso_offloaded++;
+		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
+		    ("%s: mbuf %p TSO without checksum offload", __func__, m));
+
+		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
+		if (error)
+			goto drop;
 	}
 
 	return (m);
+
+drop:
+	m_freem(m);
+	return (NULL);
 }
 
 static int
-vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
+vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
     struct vtnet_tx_header *txhdr)
 {
 	struct sglist sg;
 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
+	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m;
 	int collapsed, error;
 
-	vq = sc->vtnet_tx_vq;
+	vq = txq->vtntx_vq;
+	sc = txq->vtntx_sc;
 	m = *m_head;
 	collapsed = 0;
 
 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
 	KASSERT(error == 0 && sg.sg_nseg == 1,
-	    ("%s: cannot add header to sglist error %d", __func__, error));
+	    ("%s: error %d adding header to sglist", __func__, error));
 
 again:
 	error = sglist_append_mbuf(&sg, m);
@@ -1878,12 +2065,14 @@ again:
 
 		*m_head = m;
 		collapsed = 1;
+		txq->vtntx_stats.vtxs_collapsed++;
 		goto again;
 	}
 
 	txhdr->vth_mbuf = m;
+	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
 
-	return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0));
+	return (error);
 
 fail:
 	m_freem(*m_head);
@@ -1893,28 +2082,29 @@ fail:
 }
 
 static int
-vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
+vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head)
 {
+	struct vtnet_softc *sc;
 	struct vtnet_tx_header *txhdr;
 	struct virtio_net_hdr *hdr;
 	struct mbuf *m;
 	int error;
 
+	sc = txq->vtntx_sc;
 	m = *m_head;
 	M_ASSERTPKTHDR(m);
 
 	txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO);
 	if (txhdr == NULL) {
-		*m_head = NULL;
 		m_freem(m);
+		*m_head = NULL;
 		return (ENOMEM);
 	}
 
 	/*
-	 * Always use the non-mergeable header to simplify things. When
-	 * the mergeable feature is negotiated, the num_buffers field
-	 * must be set to zero. We use vtnet_hdr_size later to enqueue
-	 * the correct header size to the host.
+	 * Always use the non-mergeable header, regardless if the feature
+	 * was negotiated. For transmit, num_buffers is always zero. The
+	 * vtnet_hdr_size is used to enqueue the correct header size.
 	 */
 	hdr = &txhdr->vth_uhdr.hdr;
 
@@ -1927,72 +2117,55 @@ vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
 		m->m_flags &= ~M_VLANTAG;
 	}
 
-	if (m->m_pkthdr.csum_flags != 0) {
-		m = vtnet_tx_offload(sc, m, hdr);
+	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+		m = vtnet_txq_offload(txq, m, hdr);
 		if ((*m_head = m) == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 	}
 
-	error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
+	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
+	if (error == 0)
+		return (0);
+
 fail:
-	if (error)
-		uma_zfree(vtnet_tx_header_zone, txhdr);
+	uma_zfree(vtnet_tx_header_zone, txhdr);
 
 	return (error);
 }
 
-static void
-vtnet_start(struct ifnet *ifp)
-{
-	struct vtnet_softc *sc;
-
-	sc = ifp->if_softc;
-
-	VTNET_LOCK(sc);
-	vtnet_start_locked(ifp);
-	VTNET_UNLOCK(sc);
-}
+#ifdef VTNET_LEGACY_TX
 
 static void
-vtnet_start_locked(struct ifnet *ifp)
+vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
 	int enq;
 
-	sc = ifp->if_softc;
-	vq = sc->vtnet_tx_vq;
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
 	enq = 0;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_TXQ_LOCK_ASSERT(txq);
 
-	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
-	    IFF_DRV_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    sc->vtnet_link_active == 0)
 		return;
 
-#ifdef VTNET_TX_INTR_MODERATION
-	if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
-		vtnet_txeof(sc);
-#endif
-
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-		if (virtqueue_full(vq)) {
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+		if (virtqueue_full(vq))
 			break;
-		}
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
 		if (m0 == NULL)
 			break;
 
-		if (vtnet_encap(sc, &m0) != 0) {
-			if (m0 == NULL)
-				break;
-			IFQ_DRV_PREPEND(&ifp->if_snd, m0);
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+		if (vtnet_txq_encap(txq, &m0) != 0) {
+			if (m0 != NULL)
+				IFQ_DRV_PREPEND(&ifp->if_snd, m0);
 			break;
 		}
 
@@ -2002,65 +2175,529 @@ vtnet_start_locked(struct ifnet *ifp)
 
 	if (enq > 0) {
 		virtqueue_notify(vq);
-		sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
+		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
 	}
 }
 
 static void
-vtnet_tick(void *xsc)
+vtnet_start(struct ifnet *ifp)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
 
-	sc = xsc;
+	sc = ifp->if_softc;
+	txq = &sc->vtnet_txqs[0];
 
-	VTNET_LOCK_ASSERT(sc);
-#ifdef VTNET_DEBUG
-	virtqueue_dump(sc->vtnet_rx_vq);
-	virtqueue_dump(sc->vtnet_tx_vq);
-#endif
+	VTNET_TXQ_LOCK(txq);
+	vtnet_start_locked(txq, ifp);
+	VTNET_TXQ_UNLOCK(txq);
+}
 
-	vtnet_watchdog(sc);
-	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+#else /* !VTNET_LEGACY_TX */
+
+static int
+vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+	struct buf_ring *br;
+	struct ifnet *ifp;
+	int enq, error;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+	br = txq->vtntx_br;
+	ifp = sc->vtnet_ifp;
+	enq = 0;
+	error = 0;
+
+	VTNET_TXQ_LOCK_ASSERT(txq);
+
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    sc->vtnet_link_active == 0) {
+		if (m != NULL)
+			error = drbr_enqueue(ifp, br, m);
+		return (error);
+	}
+
+	if (m != NULL) {
+		error = drbr_enqueue(ifp, br, m);
+		if (error)
+			return (error);
+	}
+
+	while ((m = drbr_peek(ifp, br)) != NULL) {
+		error = vtnet_txq_encap(txq, &m);
+		if (error) {
+			if (m != NULL)
+				drbr_putback(ifp, br, m);
+			else
+				drbr_advance(ifp, br);
+			break;
+		}
+		drbr_advance(ifp, br);
+
+		enq++;
+		ETHER_BPF_MTAP(ifp, m);
+	}
+
+	if (enq > 0) {
+		virtqueue_notify(vq);
+		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	}
+
+	return (error);
+}
+
+static int
+vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	int i, npairs, error;
+
+	sc = ifp->if_softc;
+	npairs = sc->vtnet_act_vq_pairs;
+
+	if (m->m_flags & M_FLOWID)
+		i = m->m_pkthdr.flowid % npairs;
+	else
+		i = curcpu % npairs;
+
+	txq = &sc->vtnet_txqs[i];
+
+	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
+		error = vtnet_txq_mq_start_locked(txq, m);
+		VTNET_TXQ_UNLOCK(txq);
+	} else {
+		error = drbr_enqueue(ifp, txq->vtntx_br, m);
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
+	}
+
+	return (error);
 }
 
 static void
-vtnet_tx_vq_intr(void *xsc)
+vtnet_txq_tq_deferred(void *xtxq, int pending)
 {
 	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+
+	txq = xtxq;
+	sc = txq->vtntx_sc;
+
+	VTNET_TXQ_LOCK(txq);
+	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
+	VTNET_TXQ_UNLOCK(txq);
+}
+
+#endif /* VTNET_LEGACY_TX */
+
+static void
+vtnet_txq_tq_intr(void *xtxq, int pending)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
 	struct ifnet *ifp;
 
-	sc = xsc;
+	txq = xtxq;
+	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
 
-again:
-	VTNET_LOCK(sc);
+	VTNET_TXQ_LOCK(txq);
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		VTNET_UNLOCK(sc);
+	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
+
+	vtnet_txq_eof(txq);
+
+#ifdef VTNET_LEGACY_TX
+	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		vtnet_start_locked(txq, ifp);
+#else
+	if (!drbr_empty(ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
 #endif
 
+	if (vtnet_txq_enable_intr(txq) != 0) {
+		vtnet_txq_disable_intr(txq);
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+	}
+
+	VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_txq_eof(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+	struct vtnet_tx_header *txhdr;
+	struct mbuf *m;
+
+	vq = txq->vtntx_vq;
+	VTNET_TXQ_LOCK_ASSERT(txq);
+
+	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
+		m = txhdr->vth_mbuf;
+
+		txq->vtntx_stats.vtxs_opackets++;
+		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
+		if (m->m_flags & M_MCAST)
+			txq->vtntx_stats.vtxs_omcasts++;
+
+		m_freem(m);
+		uma_zfree(vtnet_tx_header_zone, txhdr);
+	}
+
+	if (virtqueue_empty(vq))
+		txq->vtntx_watchdog = 0;
+}
+
+static void
+vtnet_tx_vq_intr(void *xtxq)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	struct ifnet *ifp;
+	int tries;
+
+	txq = xtxq;
+	sc = txq->vtntx_sc;
+	ifp = sc->vtnet_ifp;
+	tries = 0;
+
+	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
+		/*
+		 * Ignore this interrupt. Either this is a spurious interrupt
+		 * or multiqueue without per-VQ MSIX so every queue needs to
+		 * be polled (a brain dead configuration we could try harder
+		 * to avoid).
+		 */
+		vtnet_txq_disable_intr(txq);
+		return;
+	}
+
+again:
+	VTNET_TXQ_LOCK(txq);
+
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
-		vtnet_enable_tx_intr(sc);
-		VTNET_UNLOCK(sc);
+		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
-	vtnet_txeof(sc);
+	vtnet_txq_eof(txq);
 
+#ifdef VTNET_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
-		vtnet_start_locked(ifp);
+		vtnet_start_locked(txq, ifp);
+#else
+	if (!drbr_empty(ifp, txq->vtntx_br))
+		vtnet_txq_mq_start_locked(txq, NULL);
+#endif
 
-	if (vtnet_enable_tx_intr(sc) != 0) {
-		vtnet_disable_tx_intr(sc);
-		sc->vtnet_stats.tx_task_rescheduled++;
-		VTNET_UNLOCK(sc);
-		goto again;
+	if (vtnet_txq_enable_intr(txq) != 0) {
+		vtnet_txq_disable_intr(txq);
+		/*
+		 * This is an occasional race, so retry a few times
+		 * before scheduling the taskqueue.
+		 */
+		VTNET_TXQ_UNLOCK(txq);
+		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
+			goto again;
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
+	} else
+		VTNET_TXQ_UNLOCK(txq);
+}
+
+static void
+vtnet_tx_start_all(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	struct vtnet_txq *txq;
+	int i;
+
+	ifp = sc->vtnet_ifp;
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+
+		VTNET_TXQ_LOCK(txq);
+#ifdef VTNET_LEGACY_TX
+		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+			vtnet_start_locked(txq, ifp);
+#else
+		if (!drbr_empty(ifp, txq->vtntx_br))
+			vtnet_txq_mq_start_locked(txq, NULL);
+#endif
+		VTNET_TXQ_UNLOCK(txq);
+	}
+}
+
+#ifndef VTNET_LEGACY_TX
+static void
+vtnet_qflush(struct ifnet *ifp)
+{
+	struct vtnet_softc *sc;
+	struct vtnet_txq *txq;
+	struct mbuf *m;
+	int i;
+
+	sc = ifp->if_softc;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+
+		VTNET_TXQ_LOCK(txq);
+		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
+			m_freem(m);
+		VTNET_TXQ_UNLOCK(txq);
+	}
+
+	if_qflush(ifp);
+}
+#endif
+
+static int
+vtnet_watchdog(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+
+	sc = txq->vtntx_sc;
+
+	VTNET_TXQ_LOCK(txq);
+	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
+		VTNET_TXQ_UNLOCK(txq);
+		return (0);
+	}
+	VTNET_TXQ_UNLOCK(txq);
+
+	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
+	    txq->vtntx_id);
+	return (1);
+}
+
+static void
+vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum)
+{
+	struct vtnet_rxq_stats *st;
+
+	st = &rxq->vtnrx_stats;
+
+	accum->vrxs_ipackets += st->vrxs_ipackets;
+	accum->vrxs_ibytes += st->vrxs_ibytes;
+	accum->vrxs_iqdrops += st->vrxs_iqdrops;
+	accum->vrxs_csum += st->vrxs_csum;
+	accum->vrxs_csum_failed += st->vrxs_csum_failed;
+	accum->vrxs_rescheduled += st->vrxs_rescheduled;
+}
+
+static void
+vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum)
+{
+	struct vtnet_txq_stats *st;
+
+	st = &txq->vtntx_stats;
+
+	accum->vtxs_opackets += st->vtxs_opackets;
+	accum->vtxs_obytes += st->vtxs_obytes;
+	accum->vtxs_csum += st->vtxs_csum;
+	accum->vtxs_tso += st->vtxs_tso;
+	accum->vtxs_collapsed += st->vtxs_collapsed;
+	accum->vtxs_rescheduled += st->vtxs_rescheduled;
+}
+
+static void
+vtnet_accumulate_stats(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	struct vtnet_statistics *st;
+	struct vtnet_rxq_stats rxaccum;
+	struct vtnet_txq_stats txaccum;
+	int i;
+
+	ifp = sc->vtnet_ifp;
+	st = &sc->vtnet_stats;
+	bzero(&rxaccum, sizeof(struct vtnet_rxq_stats));
+	bzero(&txaccum, sizeof(struct vtnet_txq_stats));
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum);
+		vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum);
+	}
+
+	st->rx_csum_offloaded = rxaccum.vrxs_csum;
+	st->rx_csum_failed = rxaccum.vrxs_csum_failed;
+	st->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
+	st->tx_csum_offloaded = txaccum.vtxs_csum;
+	st->tx_tso_offloaded = txaccum.vtxs_tso;
+	st->tx_task_rescheduled = txaccum.vtxs_rescheduled;
+
+	/*
+	 * With the exception of if_ierrors, these ifnet statistics are
+	 * only updated in the driver, so just set them to our accumulated
+	 * values. if_ierrors is updated in ether_input() for malformed
+	 * frames that we should have already discarded.
+	 */
+	ifp->if_ipackets = rxaccum.vrxs_ipackets;
+	ifp->if_iqdrops = rxaccum.vrxs_iqdrops;
+	ifp->if_ierrors = rxaccum.vrxs_ierrors;
+	ifp->if_opackets = txaccum.vtxs_opackets;
+#ifndef VTNET_LEGACY_TX
+	ifp->if_obytes = txaccum.vtxs_obytes;
+	ifp->if_omcasts = txaccum.vtxs_omcasts;
+#endif
+}
+
+static void
+vtnet_tick(void *xsc)
+{
+	struct vtnet_softc *sc;
+	struct ifnet *ifp;
+	int i, timedout;
+
+	sc = xsc;
+	ifp = sc->vtnet_ifp;
+	timedout = 0;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+	vtnet_accumulate_stats(sc);
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
+
+	if (timedout != 0) {
+		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+		vtnet_init_locked(sc);
+	} else
+		callout_schedule(&sc->vtnet_tick_ch, hz);
+}
+
+static void
+vtnet_start_taskqueues(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i, error;
+
+	dev = sc->vtnet_dev;
+
+	/*
+	 * Errors here are very difficult to recover from - we cannot
+	 * easily fail because, if this is during boot, we will hang
+	 * when freeing any successfully started taskqueues because
+	 * the scheduler isn't up yet.
+	 *
+	 * Most drivers just ignore the return value - it only fails
+	 * with ENOMEM so an error is not likely.
+	 */
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
+		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
+		if (error) {
+			device_printf(dev, "failed to start rx taskq %d\n",
+			    rxq->vtnrx_id);
+		}
+
+		txq = &sc->vtnet_txqs[i];
+		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
+		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
+		if (error) {
+			device_printf(dev, "failed to start tx taskq %d\n",
+			    txq->vtntx_id);
+		}
+	}
+}
+
+static void
+vtnet_free_taskqueues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		if (rxq->vtnrx_tq != NULL) {
+			taskqueue_free(rxq->vtnrx_tq);
+			rxq->vtnrx_vq = NULL;
+		}
+
+		txq = &sc->vtnet_txqs[i];
+		if (txq->vtntx_tq != NULL) {
+			taskqueue_free(txq->vtntx_tq);
+			txq->vtntx_tq = NULL;
+		}
 	}
+}
 
-	VTNET_UNLOCK(sc);
+static void
+vtnet_drain_taskqueues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		if (rxq->vtnrx_tq != NULL)
+			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
+
+		txq = &sc->vtnet_txqs[i];
+		if (txq->vtntx_tq != NULL) {
+			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
+#ifndef VTNET_LEGACY_TX
+			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
+#endif
+		}
+	}
+}
+
+static void
+vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		vtnet_rxq_free_mbufs(rxq);
+
+		txq = &sc->vtnet_txqs[i];
+		vtnet_txq_free_mbufs(txq);
+	}
+}
+
+static void
+vtnet_stop_rendezvous(struct vtnet_softc *sc)
+{
+	struct vtnet_rxq *rxq;
+	struct vtnet_txq *txq;
+	int i;
+
+	/*
+	 * Lock and unlock the per-queue mutex so we known the stop
+	 * state is visible. Doing only the active queues should be
+	 * sufficient, but it does not cost much extra to do all the
+	 * queues. Note we hold the core mutex here too.
+	 */
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+		VTNET_RXQ_LOCK(rxq);
+		VTNET_RXQ_UNLOCK(rxq);
+
+		txq = &sc->vtnet_txqs[i];
+		VTNET_TXQ_LOCK(txq);
+		VTNET_TXQ_UNLOCK(txq);
+	}
 }
 
 static void
@@ -2072,46 +2709,60 @@ vtnet_stop(struct vtnet_softc *sc)
 	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 
-	sc->vtnet_watchdog_timer = 0;
+	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+	sc->vtnet_link_active = 0;
 	callout_stop(&sc->vtnet_tick_ch);
-	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
-	vtnet_disable_rx_intr(sc);
-	vtnet_disable_tx_intr(sc);
+	/* Only advisory. */
+	vtnet_disable_interrupts(sc);
 
 	/*
-	 * Stop the host VirtIO adapter. Note this will reset the host
-	 * adapter's state back to the pre-initialized state, so in
-	 * order to make the device usable again, we must drive it
-	 * through virtio_reinit() and virtio_reinit_complete().
+	 * Stop the host adapter. This resets it to the pre-initialized
+	 * state. It will not generate any interrupts until after it is
+	 * reinitialized.
 	 */
 	virtio_stop(dev);
+	vtnet_stop_rendezvous(sc);
 
-	sc->vtnet_flags &= ~VTNET_FLAG_LINK;
-
-	vtnet_free_rx_mbufs(sc);
-	vtnet_free_tx_mbufs(sc);
+	/* Free any mbufs left in the virtqueues. */
+	vtnet_drain_rxtx_queues(sc);
 }
 
 static int
-vtnet_reinit(struct vtnet_softc *sc)
+vtnet_virtio_reinit(struct vtnet_softc *sc)
 {
+	device_t dev;
 	struct ifnet *ifp;
 	uint64_t features;
+	int mask, error;
 
+	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 	features = sc->vtnet_features;
 
+	mask = 0;
+#if defined(INET)
+	mask |= IFCAP_RXCSUM;
+#endif
+#if defined (INET6)
+	mask |= IFCAP_RXCSUM_IPV6;
+#endif
+
 	/*
 	 * Re-negotiate with the host, removing any disabled receive
 	 * features. Transmit features are disabled only on our side
 	 * via if_capenable and if_hwassist.
 	 */
 
-	if (ifp->if_capabilities & IFCAP_RXCSUM) {
-		if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
+	if (ifp->if_capabilities & mask) {
+		/*
+		 * We require both IPv4 and IPv6 offloading to be enabled
+		 * in order to negotiated it: VirtIO does not distinguish
+		 * between the two.
+		 */
+		if ((ifp->if_capenable & mask) != mask)
 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
 	}
 
@@ -2125,86 +2776,205 @@ vtnet_reinit(struct vtnet_softc *sc)
 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
 	}
 
-	return (virtio_reinit(sc->vtnet_dev, features));
+	error = virtio_reinit(dev, features);
+	if (error)
+		device_printf(dev, "virtio reinit error %d\n", error);
+
+	return (error);
 }
 
 static void
-vtnet_init_locked(struct vtnet_softc *sc)
+vtnet_init_rx_filters(struct vtnet_softc *sc)
 {
-	device_t dev;
 	struct ifnet *ifp;
-	int error;
 
-	dev = sc->vtnet_dev;
 	ifp = sc->vtnet_ifp;
 
-	VTNET_LOCK_ASSERT(sc);
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
+		/* Restore promiscuous and all-multicast modes. */
+		vtnet_rx_filter(sc);
+		/* Restore filtered MAC addresses. */
+		vtnet_rx_filter_mac(sc);
+	}
 
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+		vtnet_rx_filter_vlan(sc);
+}
+
+static int
+vtnet_init_rx_queues(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct vtnet_rxq *rxq;
+	int i, clsize, error;
+
+	dev = sc->vtnet_dev;
+
+	/*
+	 * Use the new cluster size if one has been set (via a MTU
+	 * change). Otherwise, use the standard 2K clusters.
+	 *
+	 * BMV: It might make sense to use page sized clusters as
+	 * the default (depending on the features negotiated).
+	 */
+	if (sc->vtnet_rx_new_clsize != 0) {
+		clsize = sc->vtnet_rx_new_clsize;
+		sc->vtnet_rx_new_clsize = 0;
+	} else
+		clsize = MCLBYTES;
+
+	sc->vtnet_rx_clsize = clsize;
+	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
+
+	/* The first segment is reserved for the header. */
+	KASSERT(sc->vtnet_rx_nmbufs < VTNET_MAX_RX_SEGS,
+	    ("%s: too many rx mbufs %d", __func__, sc->vtnet_rx_nmbufs));
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		rxq = &sc->vtnet_rxqs[i];
+
+		/* Hold the lock to satisfy asserts. */
+		VTNET_RXQ_LOCK(rxq);
+		error = vtnet_rxq_populate(rxq);
+		VTNET_RXQ_UNLOCK(rxq);
+
+		if (error) {
+			device_printf(dev,
+			    "cannot allocate mbufs for Rx queue %d\n", i);
+			return (error);
+		}
+	}
+
+	return (0);
+}
+
+static int
+vtnet_init_tx_queues(struct vtnet_softc *sc)
+{
+	struct vtnet_txq *txq;
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
+		txq = &sc->vtnet_txqs[i];
+		txq->vtntx_watchdog = 0;
+	}
+
+	return (0);
+}
+
+static int
+vtnet_init_rxtx_queues(struct vtnet_softc *sc)
+{
+	int error;
+
+	error = vtnet_init_rx_queues(sc);
+	if (error)
+		return (error);
+
+	error = vtnet_init_tx_queues(sc);
+	if (error)
+		return (error);
+
+	return (0);
+}
+
+static void
+vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int npairs;
+
+	dev = sc->vtnet_dev;
+
+	if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
+		MPASS(sc->vtnet_max_vq_pairs == 1);
+		sc->vtnet_act_vq_pairs = 1;
 		return;
+	}
 
-	/* Stop host's adapter, cancel any pending I/O. */
-	vtnet_stop(sc);
+	/* BMV: Just use the maximum configured for now. */
+	npairs = sc->vtnet_max_vq_pairs;
 
-	/* Reinitialize the host device. */
-	error = vtnet_reinit(sc);
-	if (error) {
+	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
 		device_printf(dev,
-		    "reinitialization failed, stopping device...\n");
-		vtnet_stop(sc);
-		return;
+		    "cannot set active queue pairs to %d\n", npairs);
+		npairs = 1;
 	}
 
-	/* Update host with assigned MAC address. */
+	sc->vtnet_act_vq_pairs = npairs;
+}
+
+static int
+vtnet_reinit(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+	int error;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	/* Use the current MAC address. */
 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
 	vtnet_set_hwaddr(sc);
 
+	vtnet_set_active_vq_pairs(sc);
+
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
+	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_TSO;
+	if (ifp->if_capenable & IFCAP_TSO6)
+		ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */
 
-	error = vtnet_init_rx_vq(sc);
-	if (error) {
-		device_printf(dev,
-		    "cannot allocate mbufs for Rx virtqueue\n");
-		vtnet_stop(sc);
-		return;
-	}
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
+		vtnet_init_rx_filters(sc);
 
-	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
-		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
-			/* Restore promiscuous and all-multicast modes. */
-			vtnet_rx_filter(sc);
+	error = vtnet_init_rxtx_queues(sc);
+	if (error)
+		return (error);
 
-			/* Restore filtered MAC addresses. */
-			vtnet_rx_filter_mac(sc);
-		}
+	vtnet_enable_interrupts(sc);
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
-		/* Restore VLAN filters. */
-		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
-			vtnet_rx_filter_vlan(sc);
-	}
+	return (0);
+}
 
-#ifdef DEVICE_POLLING
-	if (ifp->if_capenable & IFCAP_POLLING) {
-		vtnet_disable_rx_intr(sc);
-		vtnet_disable_tx_intr(sc);
-	} else
-#endif
-	{
-		vtnet_enable_rx_intr(sc);
-		vtnet_enable_tx_intr(sc);
-	}
+static void
+vtnet_init_locked(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
 
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+		return;
+
+	vtnet_stop(sc);
+
+	/* Reinitialize with the host. */
+	if (vtnet_virtio_reinit(sc) != 0)
+		goto fail;
+
+	if (vtnet_reinit(sc) != 0)
+		goto fail;
 
 	virtio_reinit_complete(dev);
 
 	vtnet_update_link_status(sc);
 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
+
+	return;
+
+fail:
+	vtnet_stop(sc);
 }
 
 static void
@@ -2214,9 +2984,24 @@ vtnet_init(void *xsc)
 
 	sc = xsc;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	vtnet_init_locked(sc);
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_free_ctrl_vq(struct vtnet_softc *sc)
+{
+	struct virtqueue *vq;
+
+	vq = sc->vtnet_ctrl_vq;
+
+	/*
+	 * The control virtqueue is only polled and therefore it should
+	 * already be empty.
+	 */
+	KASSERT(virtqueue_empty(vq),
+	    ("%s: ctrl vq %p not empty", __func__, vq));
 }
 
 static void
@@ -2224,87 +3009,117 @@ vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
     struct sglist *sg, int readable, int writable)
 {
 	struct virtqueue *vq;
-	void *c;
 
 	vq = sc->vtnet_ctrl_vq;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
-	    ("no control virtqueue"));
-	KASSERT(virtqueue_empty(vq),
-	    ("control command already enqueued"));
+	    ("%s: CTRL_VQ feature not negotiated", __func__));
 
+	if (!virtqueue_empty(vq))
+		return;
 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
 		return;
 
-	virtqueue_notify(vq);
-
 	/*
-	 * Poll until the command is complete. Previously, we would
-	 * sleep until the control virtqueue interrupt handler woke
-	 * us up, but dropping the VTNET_MTX leads to serialization
-	 * difficulties.
-	 *
-	 * Furthermore, it appears QEMU/KVM only allocates three MSIX
-	 * vectors. Two of those vectors are needed for the Rx and Tx
-	 * virtqueues. We do not support sharing both a Vq and config
-	 * changed notification on the same MSIX vector.
+	 * Poll for the response, but the command is likely already
+	 * done when we return from the notify.
 	 */
-	c = virtqueue_poll(vq, NULL);
-	KASSERT(c == cookie, ("unexpected control command response"));
+	virtqueue_notify(vq);
+	virtqueue_poll(vq, NULL);
 }
 
-static void
-vtnet_rx_filter(struct vtnet_softc *sc)
+static int
+vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
 {
-	device_t dev;
-	struct ifnet *ifp;
+	struct virtio_net_ctrl_hdr hdr;
+	struct sglist_seg segs[3];
+	struct sglist sg;
+	uint8_t ack;
+	int error;
 
-	dev = sc->vtnet_dev;
-	ifp = sc->vtnet_ifp;
+	hdr.class = VIRTIO_NET_CTRL_MAC;
+	hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
+	ack = VIRTIO_NET_ERR;
 
-	VTNET_LOCK_ASSERT(sc);
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
-	    ("CTRL_RX feature not negotiated"));
+	sglist_init(&sg, 3, segs);
+	error = 0;
+	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
+	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	KASSERT(error == 0 && sg.sg_nseg == 3,
+	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
 
-	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
-		device_printf(dev, "cannot %s promiscuous mode\n",
-		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
-	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
-		device_printf(dev, "cannot %s all-multicast mode\n",
-		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+	return (ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
-vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
 {
-	struct virtio_net_ctrl_hdr hdr;
 	struct sglist_seg segs[3];
 	struct sglist sg;
-	uint8_t onoff, ack;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		struct virtio_net_ctrl_mq mq;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
 	int error;
 
-	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
-		return (ENOTSUP);
+	s.hdr.class = VIRTIO_NET_CTRL_MQ;
+	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+	s.mq.virtqueue_pairs = npairs;
+	s.ack = VIRTIO_NET_ERR;
 
+	sglist_init(&sg, 3, segs);
 	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
+	KASSERT(error == 0 && sg.sg_nseg == 3,
+	    ("%s: error %d adding MQ message to sglist", __func__, error));
 
-	hdr.class = VIRTIO_NET_CTRL_RX;
-	hdr.cmd = cmd;
-	onoff = !!on;
-	ack = VIRTIO_NET_ERR;
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
+
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
+}
+
+static int
+vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
+{
+	struct sglist_seg segs[3];
+	struct sglist sg;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		uint8_t onoff;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
+	int error;
+
+	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
+	    ("%s: CTRL_RX feature not negotiated", __func__));
+
+	s.hdr.class = VIRTIO_NET_CTRL_RX;
+	s.hdr.cmd = cmd;
+	s.onoff = !!on;
+	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
-	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
-	error |= sglist_append(&sg, &onoff, sizeof(uint8_t));
-	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
-	    ("error adding Rx filter message to sglist"));
+	    ("%s: error %d adding Rx message to sglist", __func__, error));
 
-	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
-	return (ack == VIRTIO_NET_OK ? 0 : EIO);
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static int
@@ -2321,6 +3136,48 @@ vtnet_set_allmulti(struct vtnet_softc *sc, int on)
 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
 }
 
+/*
+ * The device defaults to promiscuous mode for backwards compatibility.
+ * Turn it off at attach time if possible.
+ */
+static void
+vtnet_attach_disable_promisc(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK(sc);
+	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
+		ifp->if_flags |= IFF_PROMISC;
+	} else if (vtnet_set_promisc(sc, 0) != 0) {
+		ifp->if_flags |= IFF_PROMISC;
+		device_printf(sc->vtnet_dev,
+		    "cannot disable default promiscuous mode\n");
+	}
+	VTNET_CORE_UNLOCK(sc);
+}
+
+static void
+vtnet_rx_filter(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+
+	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
+		device_printf(dev, "cannot %s promiscuous mode\n",
+		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
+
+	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
+		device_printf(dev, "cannot %s all-multicast mode\n",
+		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
+}
+
 static void
 vtnet_rx_filter_mac(struct vtnet_softc *sc)
 {
@@ -2340,19 +3197,23 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	mcnt = 0;
 	promisc = 0;
 	allmulti = 0;
-	error = 0;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
-	    ("CTRL_RX feature not negotiated"));
+	    ("%s: CTRL_RX feature not negotiated", __func__));
 
 	/* Unicast MAC addresses: */
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
-		else if (ucnt == VTNET_MAX_MAC_ENTRIES)
+		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
+		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
+			continue;
+		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
+			promisc = 1;
 			break;
+		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
@@ -2360,10 +3221,8 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	}
 	if_addr_runlock(ifp);
 
-	if (ucnt >= VTNET_MAX_MAC_ENTRIES) {
-		promisc = 1;
+	if (promisc != 0) {
 		filter->vmf_unicast.nentries = 0;
-
 		if_printf(ifp, "more than %d MAC addresses assigned, "
 		    "falling back to promiscuous mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
@@ -2375,8 +3234,10 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
-		else if (mcnt == VTNET_MAX_MAC_ENTRIES)
+		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
+			allmulti = 1;
 			break;
+		}
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
@@ -2384,17 +3245,15 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	}
 	if_maddr_runlock(ifp);
 
-	if (mcnt >= VTNET_MAX_MAC_ENTRIES) {
-		allmulti = 1;
+	if (allmulti != 0) {
 		filter->vmf_multicast.nentries = 0;
-
 		if_printf(ifp, "more than %d multicast MAC addresses "
 		    "assigned, falling back to all-multicast mode\n",
 		    VTNET_MAX_MAC_ENTRIES);
 	} else
 		filter->vmf_multicast.nentries = mcnt;
 
-	if (promisc && allmulti)
+	if (promisc != 0 && allmulti != 0)
 		goto out;
 
 	hdr.class = VIRTIO_NET_CTRL_MAC;
@@ -2402,6 +3261,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 4, segs);
+	error = 0;
 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
 	error |= sglist_append(&sg, &filter->vmf_unicast,
 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
@@ -2409,7 +3269,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 4,
-	    ("error adding MAC filtering message to sglist"));
+	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
 
 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
 
@@ -2417,111 +3277,99 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc)
 		if_printf(ifp, "error setting host MAC filter table\n");
 
 out:
-	if (promisc)
-		if (vtnet_set_promisc(sc, 1) != 0)
-			if_printf(ifp, "cannot enable promiscuous mode\n");
-	if (allmulti)
-		if (vtnet_set_allmulti(sc, 1) != 0)
-			if_printf(ifp, "cannot enable all-multicast mode\n");
+	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
+		if_printf(ifp, "cannot enable promiscuous mode\n");
+	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
+		if_printf(ifp, "cannot enable all-multicast mode\n");
 }
 
 static int
 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
-	struct virtio_net_ctrl_hdr hdr;
 	struct sglist_seg segs[3];
 	struct sglist sg;
-	uint8_t ack;
+	struct {
+		struct virtio_net_ctrl_hdr hdr;
+		uint8_t pad1;
+		uint16_t tag;
+		uint8_t pad2;
+		uint8_t ack;
+	} s;
 	int error;
 
-	hdr.class = VIRTIO_NET_CTRL_VLAN;
-	hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
-	ack = VIRTIO_NET_ERR;
-	error = 0;
+	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
+	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
+	s.tag = tag;
+	s.ack = VIRTIO_NET_ERR;
 
 	sglist_init(&sg, 3, segs);
-	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
-	error |= sglist_append(&sg, &tag, sizeof(uint16_t));
-	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
+	error = 0;
+	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
+	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
+	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
 	KASSERT(error == 0 && sg.sg_nseg == 3,
-	    ("error adding VLAN control message to sglist"));
+	    ("%s: error %d adding VLAN message to sglist", __func__, error));
 
-	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
+	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
 
-	return (ack == VIRTIO_NET_OK ? 0 : EIO);
+	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
 }
 
 static void
 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
 {
-	device_t dev;
-	uint32_t w, mask;
+	uint32_t w;
 	uint16_t tag;
-	int i, nvlans, error;
+	int i, bit;
 
-	VTNET_LOCK_ASSERT(sc);
+	VTNET_CORE_LOCK_ASSERT(sc);
 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
-	    ("VLAN_FILTER feature not negotiated"));
+	    ("%s: VLAN_FILTER feature not negotiated", __func__));
 
-	dev = sc->vtnet_dev;
-	nvlans = sc->vtnet_nvlans;
-	error = 0;
+	/* Enable the filter for each configured VLAN. */
+	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
+		w = sc->vtnet_vlan_filter[i];
+
+		while ((bit = ffs(w) - 1) != -1) {
+			w &= ~(1 << bit);
+			tag = sizeof(w) * CHAR_BIT * i + bit;
 
-	/* Enable filtering for each configured VLAN. */
-	for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
-		w = sc->vtnet_vlan_shadow[i];
-		for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) {
-			if ((w & mask) != 0) {
-				w &= ~mask;
-				nvlans--;
-				if (vtnet_exec_vlan_filter(sc, 1, tag) != 0)
-					error++;
+			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
+				device_printf(sc->vtnet_dev,
+				    "cannot enable VLAN %d filter\n", tag);
 			}
 		}
 	}
-
-	KASSERT(nvlans == 0, ("VLAN count incorrect"));
-	if (error)
-		device_printf(dev, "cannot restore VLAN filter table\n");
 }
 
 static void
-vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
+vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
 {
 	struct ifnet *ifp;
 	int idx, bit;
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
-	    ("VLAN_FILTER feature not negotiated"));
-
-	if ((tag == 0) || (tag > 4095))
-		return;
-
 	ifp = sc->vtnet_ifp;
 	idx = (tag >> 5) & 0x7F;
 	bit = tag & 0x1F;
 
-	VTNET_LOCK(sc);
+	if (tag == 0 || tag > 4095)
+		return;
+
+	VTNET_CORE_LOCK(sc);
 
-	/* Update shadow VLAN table. */
-	if (add) {
-		sc->vtnet_nvlans++;
-		sc->vtnet_vlan_shadow[idx] |= (1 << bit);
-	} else {
-		sc->vtnet_nvlans--;
-		sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
-	}
+	if (add)
+		sc->vtnet_vlan_filter[idx] |= (1 << bit);
+	else
+		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
 
-	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
-		if (vtnet_exec_vlan_filter(sc, add, tag) != 0) {
-			device_printf(sc->vtnet_dev,
-			    "cannot %s VLAN %d %s the host filter table\n",
-			    add ? "add" : "remove", tag,
-			    add ? "to" : "from");
-		}
+	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
+	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
+		device_printf(sc->vtnet_dev,
+		    "cannot %s VLAN %d %s the host filter table\n",
+		    add ? "add" : "remove", tag, add ? "to" : "from");
 	}
 
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
@@ -2531,7 +3379,7 @@ vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 	if (ifp->if_softc != arg)
 		return;
 
-	vtnet_set_vlan_filter(arg, 1, tag);
+	vtnet_update_vlan_filter(arg, 1, tag);
 }
 
 static void
@@ -2541,7 +3389,47 @@ vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
 	if (ifp->if_softc != arg)
 		return;
 
-	vtnet_set_vlan_filter(arg, 0, tag);
+	vtnet_update_vlan_filter(arg, 0, tag);
+}
+
+static int
+vtnet_is_link_up(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct ifnet *ifp;
+	uint16_t status;
+
+	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
+
+	if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
+		status = VIRTIO_NET_S_LINK_UP;
+	else
+		status = virtio_read_dev_config_2(dev,
+		    offsetof(struct virtio_net_config, status));
+
+	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
+}
+
+static void
+vtnet_update_link_status(struct vtnet_softc *sc)
+{
+	struct ifnet *ifp;
+	int link;
+
+	ifp = sc->vtnet_ifp;
+
+	VTNET_CORE_LOCK_ASSERT(sc);
+	link = vtnet_is_link_up(sc);
+
+	/* Notify if the link status has changed. */
+	if (link != 0 && sc->vtnet_link_active == 0) {
+		sc->vtnet_link_active = 1;
+		if_link_state_change(ifp, LINK_STATE_UP);
+	} else if (link == 0 && sc->vtnet_link_active != 0) {
+		sc->vtnet_link_active = 0;
+		if_link_state_change(ifp, LINK_STATE_DOWN);
+	}
 }
 
 static int
@@ -2569,112 +3457,334 @@ vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
-	VTNET_LOCK(sc);
+	VTNET_CORE_LOCK(sc);
 	if (vtnet_is_link_up(sc) != 0) {
 		ifmr->ifm_status |= IFM_ACTIVE;
 		ifmr->ifm_active |= VTNET_MEDIATYPE;
 	} else
 		ifmr->ifm_active |= IFM_NONE;
-	VTNET_UNLOCK(sc);
+	VTNET_CORE_UNLOCK(sc);
 }
 
 static void
-vtnet_add_statistics(struct vtnet_softc *sc)
+vtnet_set_hwaddr(struct vtnet_softc *sc)
 {
 	device_t dev;
-	struct vtnet_statistics *stats;
-        struct sysctl_ctx_list *ctx;
+
+	dev = sc->vtnet_dev;
+
+	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
+		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
+			device_printf(dev, "unable to set MAC address\n");
+	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
+		virtio_write_device_config(dev,
+		    offsetof(struct virtio_net_config, mac),
+		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+	}
+}
+
+static void
+vtnet_get_hwaddr(struct vtnet_softc *sc)
+{
+	device_t dev;
+
+	dev = sc->vtnet_dev;
+
+	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
+		/*
+		 * Generate a random locally administered unicast address.
+		 *
+		 * It would be nice to generate the same MAC address across
+		 * reboots, but it seems all the hosts currently available
+		 * support the MAC feature, so this isn't too important.
+		 */
+		sc->vtnet_hwaddr[0] = 0xB2;
+		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
+		vtnet_set_hwaddr(sc);
+		return;
+	}
+
+	virtio_read_device_config(dev, offsetof(struct virtio_net_config, mac),
+	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
+}
+
+static void
+vtnet_vlan_tag_remove(struct mbuf *m)
+{
+	struct ether_vlan_header *evh;
+
+	evh = mtod(m, struct ether_vlan_header *);
+	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
+	m->m_flags |= M_VLANTAG;
+
+	/* Strip the 802.1Q header. */
+	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
+	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
+	m_adj(m, ETHER_VLAN_ENCAP_LEN);
+}
+
+static void
+vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
+{
+	struct sysctl_oid *node;
+	struct sysctl_oid_list *list;
+	struct vtnet_rxq_stats *stats;
+	char namebuf[16];
+
+	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
+	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+	    CTLFLAG_RD, NULL, "Receive Queue");
+	list = SYSCTL_CHILDREN(node);
+
+	stats = &rxq->vtnrx_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
+	    &stats->vrxs_ipackets, "Receive packets");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
+	    &stats->vrxs_ibytes, "Receive bytes");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
+	    &stats->vrxs_iqdrops, "Receive drops");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
+	    &stats->vrxs_ierrors, "Receive errors");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+	    &stats->vrxs_csum, "Receive checksum offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
+	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+	    &stats->vrxs_rescheduled,
+	    "Receive interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_txq *txq)
+{
+	struct sysctl_oid *node;
+	struct sysctl_oid_list *list;
+	struct vtnet_txq_stats *stats;
+	char namebuf[16];
+
+	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
+	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+	    CTLFLAG_RD, NULL, "Transmit Queue");
+	list = SYSCTL_CHILDREN(node);
+
+	stats = &txq->vtntx_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
+	    &stats->vtxs_opackets, "Transmit packets");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
+	    &stats->vtxs_obytes, "Transmit bytes");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
+	    &stats->vtxs_omcasts, "Transmit multicasts");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
+	    &stats->vtxs_csum, "Transmit checksum offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
+	    &stats->vtxs_tso, "Transmit segmentation offloaded");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "collapsed", CTLFLAG_RD,
+	    &stats->vtxs_collapsed, "Transmit mbufs collapsed");
+	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
+	    &stats->vtxs_rescheduled,
+	    "Transmit interrupt handler rescheduled");
+}
+
+static void
+vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree;
 	struct sysctl_oid_list *child;
+	int i;
 
 	dev = sc->vtnet_dev;
-	stats = &sc->vtnet_stats;
 	ctx = device_get_sysctl_ctx(dev);
 	tree = device_get_sysctl_tree(dev);
 	child = SYSCTL_CHILDREN(tree);
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed",
+	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
+		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
+	}
+}
+
+static void
+vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
+    struct sysctl_oid_list *child, struct vtnet_softc *sc)
+{
+	struct vtnet_statistics *stats;
+
+	stats = &sc->vtnet_stats;
+
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
 	    "Mbuf cluster allocation failures");
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
 	    CTLFLAG_RD, &stats->rx_frame_too_large,
 	    "Received frame larger than the mbuf chain");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
 	    "Enqueuing the replacement receive mbuf failed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
 	    "Mergeable buffers receive failures");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
 	    "Received checksum offloaded buffer with unsupported "
 	    "Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start",
-	    CTLFLAG_RD, &stats->rx_csum_bad_start,
-	    "Received checksum offloaded buffer with incorrect start offset");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
 	    "Received checksum offloaded buffer with incorrect IP protocol");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
 	    "Received checksum offloaded buffer with incorrect offset");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
+	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
+	    "Received checksum offloaded buffer with incorrect protocol");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
 	    CTLFLAG_RD, &stats->rx_csum_failed,
 	    "Received buffer checksum offload failed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
 	    "Received buffer checksum offload succeeded");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
 	    "Times the receive interrupt task rescheduled itself");
 
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded",
-	    CTLFLAG_RD, &stats->tx_csum_offloaded,
-	    "Offloaded checksum of transmitted buffer");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded",
-	    CTLFLAG_RD, &stats->tx_tso_offloaded,
-	    "Segmentation offload of transmitted buffer");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
 	    "Aborted transmit of checksum offloaded buffer with unknown "
 	    "Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled",
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
+	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
+	    "Aborted transmit of TSO buffer with non TCP protocol");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
+	    CTLFLAG_RD, &stats->tx_csum_offloaded,
+	    "Offloaded checksum of transmitted buffer");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
+	    CTLFLAG_RD, &stats->tx_tso_offloaded,
+	    "Segmentation offload of transmitted buffer");
+	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
 	    "Times the transmit interrupt task rescheduled itself");
 }
 
+static void
+vtnet_setup_sysctl(struct vtnet_softc *sc)
+{
+	device_t dev;
+	struct sysctl_ctx_list *ctx;
+	struct sysctl_oid *tree;
+	struct sysctl_oid_list *child;
+
+	dev = sc->vtnet_dev;
+	ctx = device_get_sysctl_ctx(dev);
+	tree = device_get_sysctl_tree(dev);
+	child = SYSCTL_CHILDREN(tree);
+
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
+	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
+	    "Maximum number of supported virtqueue pairs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
+	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
+	    "Number of active virtqueue pairs");
+
+	vtnet_setup_stat_sysctl(ctx, child, sc);
+}
+
 static int
-vtnet_enable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
 {
 
-	return (virtqueue_enable_intr(sc->vtnet_rx_vq));
+	return (virtqueue_enable_intr(rxq->vtnrx_vq));
 }
 
 static void
-vtnet_disable_rx_intr(struct vtnet_softc *sc)
+vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
 {
 
-	virtqueue_disable_intr(sc->vtnet_rx_vq);
+	virtqueue_disable_intr(rxq->vtnrx_vq);
 }
 
 static int
-vtnet_enable_tx_intr(struct vtnet_softc *sc)
+vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
 
-#ifdef VTNET_TX_INTR_MODERATION
-	return (0);
-#else
-	return (virtqueue_enable_intr(sc->vtnet_tx_vq));
-#endif
+	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+}
+
+static void
+vtnet_txq_disable_intr(struct vtnet_txq *txq)
+{
+
+	virtqueue_disable_intr(txq->vtntx_vq);
+}
+
+static void
+vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
 }
 
 static void
-vtnet_disable_tx_intr(struct vtnet_softc *sc)
+vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
 {
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_enable_interrupts(struct vtnet_softc *sc)
+{
+
+	vtnet_enable_rx_interrupts(sc);
+	vtnet_enable_tx_interrupts(sc);
+}
+
+static void
+vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
+}
+
+static void
+vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
+		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
+}
+
+static void
+vtnet_disable_interrupts(struct vtnet_softc *sc)
+{
+
+	vtnet_disable_rx_interrupts(sc);
+	vtnet_disable_tx_interrupts(sc);
+}
+
+static int
+vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
+{
+	char path[64];
+
+	snprintf(path, sizeof(path),
+	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
+	TUNABLE_INT_FETCH(path, &def);
 
-	virtqueue_disable_intr(sc->vtnet_tx_vq);
+	return (def);
 }
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index d870436..5921103 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -29,83 +29,165 @@
 #ifndef _IF_VTNETVAR_H
 #define _IF_VTNETVAR_H
 
+struct vtnet_softc;
+
 struct vtnet_statistics {
-	unsigned long	mbuf_alloc_failed;
-
-	unsigned long	rx_frame_too_large;
-	unsigned long	rx_enq_replacement_failed;
-	unsigned long	rx_mergeable_failed;
-	unsigned long	rx_csum_bad_ethtype;
-	unsigned long	rx_csum_bad_start;
-	unsigned long	rx_csum_bad_ipproto;
-	unsigned long	rx_csum_bad_offset;
-	unsigned long	rx_csum_failed;
-	unsigned long	rx_csum_offloaded;
-	unsigned long	rx_task_rescheduled;
-
-	unsigned long	tx_csum_offloaded;
-	unsigned long	tx_tso_offloaded;
-	unsigned long	tx_csum_bad_ethtype;
-	unsigned long	tx_tso_bad_ethtype;
-	unsigned long	tx_task_rescheduled;
+	uint64_t	mbuf_alloc_failed;
+
+	uint64_t	rx_frame_too_large;
+	uint64_t	rx_enq_replacement_failed;
+	uint64_t	rx_mergeable_failed;
+	uint64_t	rx_csum_bad_ethtype;
+	uint64_t	rx_csum_bad_ipproto;
+	uint64_t	rx_csum_bad_offset;
+	uint64_t	rx_csum_bad_proto;
+	uint64_t	tx_csum_bad_ethtype;
+	uint64_t	tx_tso_bad_ethtype;
+	uint64_t	tx_tso_not_tcp;
+
+	/*
+	 * These are accumulated from each Rx/Tx queue.
+	 */
+	uint64_t	rx_csum_failed;
+	uint64_t	rx_csum_offloaded;
+	uint64_t	rx_task_rescheduled;
+	uint64_t	tx_csum_offloaded;
+	uint64_t	tx_tso_offloaded;
+	uint64_t	tx_task_rescheduled;
+};
+
+struct vtnet_rxq_stats {
+	uint64_t	vrxs_ipackets;	/* if_ipackets */
+	uint64_t	vrxs_ibytes;	/* if_ibytes */
+	uint64_t	vrxs_iqdrops;	/* if_iqdrops */
+	uint64_t	vrxs_ierrors;	/* if_ierrors */
+	uint64_t	vrxs_csum;
+	uint64_t	vrxs_csum_failed;
+	uint64_t	vrxs_rescheduled;
 };
 
+struct vtnet_rxq {
+	struct mtx		 vtnrx_mtx;
+	struct vtnet_softc	*vtnrx_sc;
+	struct virtqueue	*vtnrx_vq;
+	int			 vtnrx_id;
+	int			 vtnrx_process_limit;
+	struct vtnet_rxq_stats	 vtnrx_stats;
+	struct taskqueue	*vtnrx_tq;
+	struct task		 vtnrx_intrtask;
+	char			 vtnrx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_RXQ_LOCK(_rxq)	mtx_lock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_UNLOCK(_rxq)	mtx_unlock(&(_rxq)->vtnrx_mtx)
+#define VTNET_RXQ_LOCK_ASSERT(_rxq)		\
+    mtx_assert(&(_rxq)->vtnrx_mtx, MA_OWNED)
+#define VTNET_RXQ_LOCK_ASSERT_NOTOWNED(_rxq)	\
+    mtx_assert(&(_rxq)->vtnrx_mtx, MA_NOTOWNED)
+
+struct vtnet_txq_stats {
+	uint64_t vtxs_opackets;	/* if_opackets */
+	uint64_t vtxs_obytes;	/* if_obytes */
+	uint64_t vtxs_omcasts;	/* if_omcasts */
+	uint64_t vtxs_csum;
+	uint64_t vtxs_tso;
+	uint64_t vtxs_collapsed;
+	uint64_t vtxs_rescheduled;
+};
+
+struct vtnet_txq {
+	struct mtx		 vtntx_mtx;
+	struct vtnet_softc	*vtntx_sc;
+	struct virtqueue	*vtntx_vq;
+#ifndef VTNET_LEGACY_TX
+	struct buf_ring		*vtntx_br;
+#endif
+	int			 vtntx_id;
+	int			 vtntx_watchdog;
+	struct vtnet_txq_stats	 vtntx_stats;
+	struct taskqueue	*vtntx_tq;
+	struct task		 vtntx_intrtask;
+#ifndef VTNET_LEGACY_TX
+	struct task		 vtntx_defrtask;
+#endif
+	char			 vtntx_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+#define VTNET_TXQ_LOCK(_txq)	mtx_lock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_TRYLOCK(_txq)	mtx_trylock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_UNLOCK(_txq)	mtx_unlock(&(_txq)->vtntx_mtx)
+#define VTNET_TXQ_LOCK_ASSERT(_txq)		\
+    mtx_assert(&(_txq)->vtntx_mtx, MA_OWNED)
+#define VTNET_TXQ_LOCK_ASSERT_NOTOWNED(_txq)	\
+    mtx_assert(&(_txq)->vtntx_mtx, MA_NOTOWNED)
+
 struct vtnet_softc {
 	device_t		 vtnet_dev;
 	struct ifnet		*vtnet_ifp;
-	struct mtx		 vtnet_mtx;
+	struct vtnet_rxq	*vtnet_rxqs;
+	struct vtnet_txq	*vtnet_txqs;
 
 	uint32_t		 vtnet_flags;
-#define VTNET_FLAG_LINK		 0x0001
-#define VTNET_FLAG_SUSPENDED	 0x0002
+#define VTNET_FLAG_SUSPENDED	 0x0001
+#define VTNET_FLAG_MAC		 0x0002
 #define VTNET_FLAG_CTRL_VQ	 0x0004
 #define VTNET_FLAG_CTRL_RX	 0x0008
-#define VTNET_FLAG_VLAN_FILTER	 0x0010
-#define VTNET_FLAG_TSO_ECN	 0x0020
-#define VTNET_FLAG_MRG_RXBUFS	 0x0040
-#define VTNET_FLAG_LRO_NOMRG	 0x0080
-
-	struct virtqueue	*vtnet_rx_vq;
-	struct virtqueue	*vtnet_tx_vq;
-	struct virtqueue	*vtnet_ctrl_vq;
+#define VTNET_FLAG_CTRL_MAC	 0x0010
+#define VTNET_FLAG_VLAN_FILTER	 0x0020
+#define VTNET_FLAG_TSO_ECN	 0x0040
+#define VTNET_FLAG_MRG_RXBUFS	 0x0080
+#define VTNET_FLAG_LRO_NOMRG	 0x0100
+#define VTNET_FLAG_MULTIQ	 0x0200
 
+	int			 vtnet_link_active;
 	int			 vtnet_hdr_size;
-	int			 vtnet_tx_size;
-	int			 vtnet_rx_size;
 	int			 vtnet_rx_process_limit;
-	int			 vtnet_rx_mbuf_size;
-	int			 vtnet_rx_mbuf_count;
+	int			 vtnet_rx_nmbufs;
+	int			 vtnet_rx_clsize;
+	int			 vtnet_rx_new_clsize;
 	int			 vtnet_if_flags;
-	int			 vtnet_watchdog_timer;
-	uint64_t		 vtnet_features;
+	int			 vtnet_act_vq_pairs;
+	int			 vtnet_max_vq_pairs;
 
-	struct vtnet_statistics	 vtnet_stats;
+	struct virtqueue	*vtnet_ctrl_vq;
+	struct vtnet_mac_filter	*vtnet_mac_filter;
+	uint32_t		*vtnet_vlan_filter;
 
+	uint64_t		 vtnet_features;
+	struct vtnet_statistics	 vtnet_stats;
 	struct callout		 vtnet_tick_ch;
-
+	struct ifmedia		 vtnet_media;
 	eventhandler_tag	 vtnet_vlan_attach;
 	eventhandler_tag	 vtnet_vlan_detach;
 
-	struct ifmedia		 vtnet_media;
-	/*
-	 * Fake media type; the host does not provide us with
-	 * any real media information.
-	 */
-#define VTNET_MEDIATYPE		 (IFM_ETHER | IFM_1000_T | IFM_FDX)
+	struct mtx		 vtnet_mtx;
+	char			 vtnet_mtx_name[16];
 	char			 vtnet_hwaddr[ETHER_ADDR_LEN];
+};
 
-	struct vtnet_mac_filter	*vtnet_mac_filter;
-	/*
-	 * During reset, the host's VLAN filtering table is lost. The
-	 * array below is used to restore all the VLANs configured on
-	 * this interface after a reset.
-	 */
-#define VTNET_VLAN_SHADOW_SIZE	 (4096 / 32)
-	int			 vtnet_nvlans;
-	uint32_t		 vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE];
+/*
+ * Maximum number of queue pairs we will autoconfigure to.
+ */
+#define VTNET_MAX_QUEUE_PAIRS	8
 
-	char			 vtnet_mtx_name[16];
-};
+/*
+ * Additional completed entries can appear in a virtqueue before we can
+ * reenable interrupts. Number of times to retry before scheduling the
+ * taskqueue to process the completed entries.
+ */
+#define VTNET_INTR_DISABLE_RETRIES	4
+
+/*
+ * Fake the media type. The host does not provide us with any real media
+ * information.
+ */
+#define VTNET_MEDIATYPE		 (IFM_ETHER | IFM_10G_T | IFM_FDX)
+
+/*
+ * Number of words to allocate for the VLAN shadow table. There is one
+ * bit for each VLAN.
+ */
+#define VTNET_VLAN_FILTER_NWORDS	(4096 / 32)
 
 /*
  * When mergeable buffers are not negotiated, the vtnet_rx_header structure
@@ -161,8 +243,12 @@ struct vtnet_mac_filter {
  */
 CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
 
-#define VTNET_WATCHDOG_TIMEOUT	5
+#define VTNET_TX_TIMEOUT	5
 #define VTNET_CSUM_OFFLOAD	(CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define VTNET_CSUM_OFFLOAD_IPV6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+
+#define VTNET_CSUM_ALL_OFFLOAD	\
+    (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO)
 
 /* Features desired/implemented by this driver. */
 #define VTNET_FEATURES \
@@ -170,8 +256,10 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
      VIRTIO_NET_F_STATUS		| \
      VIRTIO_NET_F_CTRL_VQ		| \
      VIRTIO_NET_F_CTRL_RX		| \
+     VIRTIO_NET_F_CTRL_MAC_ADDR		| \
      VIRTIO_NET_F_CTRL_VLAN		| \
      VIRTIO_NET_F_CSUM			| \
+     VIRTIO_NET_F_GSO			| \
      VIRTIO_NET_F_HOST_TSO4		| \
      VIRTIO_NET_F_HOST_TSO6		| \
      VIRTIO_NET_F_HOST_ECN		| \
@@ -180,9 +268,18 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE);
      VIRTIO_NET_F_GUEST_TSO6		| \
      VIRTIO_NET_F_GUEST_ECN		| \
      VIRTIO_NET_F_MRG_RXBUF		| \
+     VIRTIO_NET_F_MQ			| \
+     VIRTIO_RING_F_EVENT_IDX		| \
      VIRTIO_RING_F_INDIRECT_DESC)
 
 /*
+ * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host
+ * frames larger than 1514 bytes.
+ */
+#define VTNET_TSO_FEATURES (VIRTIO_NET_F_GSO | VIRTIO_NET_F_HOST_TSO4 | \
+    VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN)
+
+/*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
  * via tcp_lro_rx().
@@ -209,27 +306,34 @@ CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
 CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
 
 /*
+ * Number of slots in the Tx bufrings. This value matches most other
+ * multiqueue drivers.
+ */
+#define VTNET_DEFAULT_BUFRING_SIZE	4096
+
+/*
  * Determine how many mbufs are in each receive buffer. For LRO without
  * mergeable descriptors, we must allocate an mbuf chain large enough to
  * hold both the vtnet_rx_header and the maximum receivable data.
  */
-#define VTNET_NEEDED_RX_MBUFS(_sc)					\
+#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize)				\
 	((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 :		\
 	    howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE,	\
-	        (_sc)->vtnet_rx_mbuf_size)
+	        (_clsize))
 
-#define VTNET_MTX(_sc)		&(_sc)->vtnet_mtx
-#define VTNET_LOCK(_sc)		mtx_lock(VTNET_MTX((_sc)))
-#define VTNET_UNLOCK(_sc)	mtx_unlock(VTNET_MTX((_sc)))
-#define VTNET_LOCK_DESTROY(_sc)	mtx_destroy(VTNET_MTX((_sc)))
-#define VTNET_LOCK_ASSERT(_sc)	mtx_assert(VTNET_MTX((_sc)), MA_OWNED)
-#define VTNET_LOCK_ASSERT_NOTOWNED(_sc)	\
-	 			mtx_assert(VTNET_MTX((_sc)), MA_NOTOWNED)
+#define VTNET_CORE_MTX(_sc)		&(_sc)->vtnet_mtx
+#define VTNET_CORE_LOCK(_sc)		mtx_lock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_UNLOCK(_sc)		mtx_unlock(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_DESTROY(_sc)	mtx_destroy(VTNET_CORE_MTX((_sc)))
+#define VTNET_CORE_LOCK_ASSERT(_sc)		\
+    mtx_assert(VTNET_CORE_MTX((_sc)), MA_OWNED)
+#define VTNET_CORE_LOCK_ASSERT_NOTOWNED(_sc)	\
+    mtx_assert(VTNET_CORE_MTX((_sc)), MA_NOTOWNED)
 
-#define VTNET_LOCK_INIT(_sc) do {					\
+#define VTNET_CORE_LOCK_INIT(_sc) do {					\
     snprintf((_sc)->vtnet_mtx_name, sizeof((_sc)->vtnet_mtx_name),	\
         "%s", device_get_nameunit((_sc)->vtnet_dev));			\
-    mtx_init(VTNET_MTX((_sc)), (_sc)->vtnet_mtx_name,			\
+    mtx_init(VTNET_CORE_MTX((_sc)), (_sc)->vtnet_mtx_name,		\
         "VTNET Core Lock", MTX_DEF);					\
 } while (0)
 
diff --git a/sys/dev/virtio/network/virtio_net.h b/sys/dev/virtio/network/virtio_net.h
index 15a73cc..f4f9feb 100644
--- a/sys/dev/virtio/network/virtio_net.h
+++ b/sys/dev/virtio/network/virtio_net.h
@@ -50,14 +50,22 @@
 #define VIRTIO_NET_F_CTRL_RX	0x40000 /* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN	0x80000 /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */
+#define VIRTIO_NET_F_MQ		0x400000 /* Device supports RFS */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
 struct virtio_net_config {
 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
-	uint8_t		mac[ETHER_ADDR_LEN]; 
+	uint8_t		mac[ETHER_ADDR_LEN];
 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
 	uint16_t	status;
+	/* Maximum number of each of transmit and receive queues;
+	 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
+	 * Legal values are between 1 and 0x8000.
+	 */
+	uint16_t	max_virtqueue_pairs;
 } __packed;
 
 /*
@@ -66,6 +74,7 @@ struct virtio_net_config {
  */
 struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start,csum_offset*/
+#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
 	uint8_t	flags;
 #define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
 #define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
@@ -100,8 +109,6 @@ struct virtio_net_ctrl_hdr {
 	uint8_t cmd;
 } __packed;
 
-typedef uint8_t virtio_net_ctrl_ack;
-
 #define VIRTIO_NET_OK	0
 #define VIRTIO_NET_ERR	1
 
@@ -134,6 +141,10 @@ typedef uint8_t virtio_net_ctrl_ack;
  * first sg list contains unicast addresses, the second is for multicast.
  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
  * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
  */
 struct virtio_net_ctrl_mac {
 	uint32_t	entries;
@@ -142,6 +153,7 @@ struct virtio_net_ctrl_mac {
 
 #define VIRTIO_NET_CTRL_MAC	1
 #define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
+#define VIRTIO_NET_CTRL_MAC_ADDR_SET	1
 
 /*
  * Control VLAN filtering
@@ -156,4 +168,35 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_VLAN_ADD	0
 #define VIRTIO_NET_CTRL_VLAN_DEL	1
 
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE	3
+#define VIRTIO_NET_CTRL_ANNOUNCE_ACK	0
+
+/*
+ * Control Receive Flow Steering
+ *
+ * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET enables Receive Flow
+ * Steering, specifying the number of the transmit and receive queues
+ * that will be used. After the command is consumed and acked by the
+ * device, the device will not steer new packets on receive virtqueues
+ * other than specified nor read from transmit virtqueues other than
+ * specified. Accordingly, driver should not transmit new packets on
+ * virtqueues other than specified.
+ */
+struct virtio_net_ctrl_mq {
+	uint16_t	virtqueue_pairs;
+} __packed;
+
+#define VIRTIO_NET_CTRL_MQ	4
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET		0
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN		1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX		0x8000
+
 #endif /* _VIRTIO_NET_H */
diff --git a/sys/dev/virtio/pci/virtio_pci.c b/sys/dev/virtio/pci/virtio_pci.c
index b3df3d9..dcd82ec 100644
--- a/sys/dev/virtio/pci/virtio_pci.c
+++ b/sys/dev/virtio/pci/virtio_pci.c
@@ -757,8 +757,10 @@ vtpci_probe_and_attach_child(struct vtpci_softc *sc)
 		vtpci_release_child_resources(sc);
 		/* Reset status for future attempt. */
 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
-	} else
+	} else {
 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+		VIRTIO_ATTACH_COMPLETED(child);
+	}
 }
 
 static int
diff --git a/sys/dev/virtio/virtio_if.m b/sys/dev/virtio/virtio_if.m
index 9a99d37..521f4b8 100644
--- a/sys/dev/virtio/virtio_if.m
+++ b/sys/dev/virtio/virtio_if.m
@@ -31,6 +31,18 @@ INTERFACE virtio;
 
 CODE {
 	static int
+	virtio_default_attach_completed(device_t dev)
+	{
+		return (0);
+	}
+};
+
+METHOD int attach_completed {
+	device_t	dev;
+} DEFAULT virtio_default_attach_completed;
+
+CODE {
+	static int
 	virtio_default_config_change(device_t dev)
 	{
 		return (0);
diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c
index a82426e..beff14c 100644
--- a/sys/dev/virtio/virtqueue.c
+++ b/sys/dev/virtio/virtqueue.c
@@ -127,7 +127,7 @@ static uint16_t	vq_ring_enqueue_segments(struct virtqueue *,
 static int	vq_ring_use_indirect(struct virtqueue *, int);
 static void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
 		    struct sglist *, int, int);
-static int 	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
+static int	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
 static int	vq_ring_must_notify_host(struct virtqueue *);
 static void	vq_ring_notify_host(struct virtqueue *);
 static void	vq_ring_free_chain(struct virtqueue *, uint16_t);
@@ -440,28 +440,38 @@ virtqueue_enable_intr(struct virtqueue *vq)
 }
 
 int
-virtqueue_postpone_intr(struct virtqueue *vq)
+virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint)
 {
 	uint16_t ndesc, avail_idx;
 
-	/*
-	 * Request the next interrupt be postponed until at least half
-	 * of the available descriptors have been consumed.
-	 */
 	avail_idx = vq->vq_ring.avail->idx;
-	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx) / 2;
+	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx);
+
+	switch (hint) {
+	case VQ_POSTPONE_SHORT:
+		ndesc /= 4;
+		break;
+	case VQ_POSTPONE_LONG:
+		ndesc *= 3 / 4;
+		break;
+	case VQ_POSTPONE_EMPTIED:
+		break;
+	}
 
 	return (vq_ring_enable_interrupt(vq, ndesc));
 }
 
+/*
+ * Note this is only considered a hint to the host.
+ */
 void
 virtqueue_disable_intr(struct virtqueue *vq)
 {
 
-	/*
-	 * Note this is only considered a hint to the host.
-	 */
-	if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0)
+	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
+		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx -
+		    vq->vq_nentries - 1;
+	} else
 		vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 }
 
diff --git a/sys/dev/virtio/virtqueue.h b/sys/dev/virtio/virtqueue.h
index 128a10a..0d4ed94 100644
--- a/sys/dev/virtio/virtqueue.h
+++ b/sys/dev/virtio/virtqueue.h
@@ -41,6 +41,16 @@ struct sglist;
 /* Device callback for a virtqueue interrupt. */
 typedef void virtqueue_intr_t(void *);
 
+/*
+ * Hint on how long the next interrupt should be postponed. This is
+ * only used when the EVENT_IDX feature is negotiated.
+ */
+typedef enum {
+	VQ_POSTPONE_SHORT,
+	VQ_POSTPONE_LONG,
+	VQ_POSTPONE_EMPTIED	/* Until all available desc are used. */
+} vq_postpone_t;
+
 #define VIRTQUEUE_MAX_NAME_SZ	32
 
 /* One for each virtqueue the device wishes to allocate. */
@@ -73,7 +83,7 @@ int	 virtqueue_reinit(struct virtqueue *vq, uint16_t size);
 int	 virtqueue_intr_filter(struct virtqueue *vq);
 void	 virtqueue_intr(struct virtqueue *vq);
 int	 virtqueue_enable_intr(struct virtqueue *vq);
-int	 virtqueue_postpone_intr(struct virtqueue *vq);
+int	 virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint);
 void	 virtqueue_disable_intr(struct virtqueue *vq);
 
 /* Get physical address of the virtqueue ring. */
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 90994cc..ebb6935 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -2914,11 +2914,92 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 }
 
 /*
- *	This function is advisory.
+ *	Apply the given advice to the specified range of addresses within the
+ *	given pmap.  Depending on the advice, clear the referenced and/or
+ *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
+	pd_entry_t *pde, *pdpe;
+	pt_entry_t *pte;
+	vm_offset_t va, va_next;
+	vm_paddr_t pa;
+	vm_page_t m;
+
+	if (advice != MADV_DONTNEED && advice != MADV_FREE)
+		return;
+	rw_wlock(&pvh_global_lock);
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = va_next) {
+		pdpe = pmap_segmap(pmap, sva);
+#ifdef __mips_n64
+		if (*pdpe == 0) {
+			va_next = (sva + NBSEG) & ~SEGMASK;
+			if (va_next < sva)
+				va_next = eva;
+			continue;
+		}
+#endif
+		va_next = (sva + NBPDR) & ~PDRMASK;
+		if (va_next < sva)
+			va_next = eva;
+
+		pde = pmap_pdpe_to_pde(pdpe, sva);
+		if (*pde == NULL)
+			continue;
+
+		/*
+		 * Limit our scan to either the end of the va represented
+		 * by the current page table page, or to the end of the
+		 * range being write protected.
+		 */
+		if (va_next > eva)
+			va_next = eva;
+
+		va = va_next;
+		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+		    sva += PAGE_SIZE) {
+			if (!pte_test(pte, PTE_MANAGED | PTE_V)) {
+				if (va != va_next) {
+					pmap_invalidate_range(pmap, va, sva);
+					va = va_next;
+				}
+				continue;
+			}
+			pa = TLBLO_PTE_TO_PA(*pte);
+			m = PHYS_TO_VM_PAGE(pa);
+			m->md.pv_flags &= ~PV_TABLE_REF;
+			if (pte_test(pte, PTE_D)) {
+				if (advice == MADV_DONTNEED) {
+					/*
+					 * Future calls to pmap_is_modified()
+					 * can be avoided by making the page
+					 * dirty now.
+					 */
+					vm_page_dirty(m);
+				} else {
+					pte_clear(pte, PTE_D);
+					if (va == va_next)
+						va = sva;
+				}
+			} else {
+				/*
+				 * Unless PTE_D is set, any TLB entries
+				 * mapping "sva" don't allow write access, so
+				 * they needn't be invalidated.
+				 */
+				if (va != va_next) {
+					pmap_invalidate_range(pmap, va, sva);
+					va = va_next;
+				}
+			}
+		}
+		if (va != va_next)
+			pmap_invalidate_range(pmap, va, sva);
+	}
+	rw_wunlock(&pvh_global_lock);
+	PMAP_UNLOCK(pmap);
 }
 
 /*
diff --git a/sys/modules/virtio/network/Makefile b/sys/modules/virtio/network/Makefile
index 8463309c..f124d99 100644
--- a/sys/modules/virtio/network/Makefile
+++ b/sys/modules/virtio/network/Makefile
@@ -23,14 +23,29 @@
 # SUCH DAMAGE.
 #
 
+.include <bsd.own.mk>
+
 .PATH: ${.CURDIR}/../../../dev/virtio/network
 
 KMOD=	if_vtnet
 SRCS=	if_vtnet.c
 SRCS+=	virtio_bus_if.h virtio_if.h
 SRCS+=	bus_if.h device_if.h 
+SRCS+=	opt_inet.h opt_inet6.h
 
 MFILES=	kern/bus_if.m kern/device_if.m \
 	dev/virtio/virtio_bus_if.m dev/virtio/virtio_if.m
 
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+	@echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+	@echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
 .include <bsd.kmod.mk>
diff --git a/sys/powerpc/powermac/atibl.c b/sys/powerpc/powermac/atibl.c
index fff76d0..f4ac9b0 100644
--- a/sys/powerpc/powermac/atibl.c
+++ b/sys/powerpc/powermac/atibl.c
@@ -86,6 +86,8 @@ DRIVER_MODULE(atibl, vgapci, atibl_driver, atibl_devclass, 0, 0);
 static void
 atibl_identify(driver_t *driver, device_t parent)
 {
+	if (OF_finddevice("mac-io/backlight") == -1)
+		return;
 	if (device_find_child(parent, "backlight", -1) == NULL)
 		device_add_child(parent, "backlight", -1);
 }
diff --git a/sys/powerpc/powermac/nvbl.c b/sys/powerpc/powermac/nvbl.c
index 1f89881..033f972 100644
--- a/sys/powerpc/powermac/nvbl.c
+++ b/sys/powerpc/powermac/nvbl.c
@@ -82,6 +82,8 @@ DRIVER_MODULE(nvbl, vgapci, nvbl_driver, nvbl_devclass, 0, 0);
 static void
 nvbl_identify(driver_t *driver, device_t parent)
 {
+	if (OF_finddevice("mac-io/backlight") == -1)
+		return;
 	if (device_find_child(parent, "backlight", -1) == NULL)
 		device_add_child(parent, "backlight", -1);
 }
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 6627a07..e39654e 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -780,7 +780,7 @@ finished:
 
 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
-		keg_free_slab(keg, slab, 0);
+		keg_free_slab(keg, slab, keg->uk_ipers);
 	}
 }