20 files changed, 237 insertions, 81 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index e05e366..9c8b9c9 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -2706,6 +2706,8 @@ device		uvisor
 # USB serial support for DDI pocket's PHS
 device		uvscom
 #
+# USB ethernet support
+device		uether
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
diff --git a/sys/conf/files b/sys/conf/files
index e124bf4..f6456a7 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2529,9 +2529,9 @@ dev/usb/net/if_udav.c		optional udav
 dev/usb/net/if_usie.c		optional usie
 dev/usb/net/if_urndis.c		optional urndis
 dev/usb/net/ruephy.c		optional rue
-dev/usb/net/usb_ethernet.c	optional aue | axe | axge | cdce | cue | kue | \
-					 mos | rue | smsc | udav | ipheth | \
-					 urndis
+dev/usb/net/usb_ethernet.c	optional uether | aue | axe | axge | cdce | \
+					 cue | ipheth | kue | mos | rue | \
+					 smsc | udav | urndis
 dev/usb/net/uhso.c		optional uhso
 #
 # USB WLAN drivers
diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c
index f8a698f..b302de9 100644
--- a/sys/dev/filemon/filemon.c
+++ b/sys/dev/filemon/filemon.c
@@ -43,7 +43,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/mutex.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
@@ -195,9 +194,6 @@ filemon_open(struct cdev *dev, int oflags __unused, int devtype __unused,
 	if (filemon == NULL) {
 		filemon = malloc(sizeof(struct filemon), M_FILEMON,
 		    M_WAITOK | M_ZERO);
-
-		filemon->fp = NULL;
-
 		sx_init(&filemon->lock, "filemon");
 	}
 
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index bc0609d..60e448a 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -127,6 +127,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/_inttypes.h>
 #include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -403,6 +404,8 @@ xctrl_suspend()
 	gnttab_resume();
 
 #ifdef SMP
+	/* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
+	lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
 	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
 		/*
 		 * Now that event channels have been initialized,
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 2abd7e4..d540dc9 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -135,6 +135,7 @@ int nfsrv_checksequence(struct nfsrv_descript *, uint32_t, uint32_t *,
     uint32_t *, int, uint32_t *, NFSPROC_T *);
 int nfsrv_checkreclaimcomplete(struct nfsrv_descript *);
 void nfsrv_cache_session(uint8_t *, uint32_t, int, struct mbuf **);
+void nfsrv_freeallbackchannel_xprts(void);
 
 /* nfs_nfsdserv.c */
 int nfsrvd_access(struct nfsrv_descript *, int,
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index bfe6fa3..429cfcc 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3087,6 +3087,25 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 			*eofp = eof;
 	}
 
+	/*
+	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
+	 */
+	while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
+		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+		dp->d_type = DT_UNKNOWN;
+		dp->d_fileno = 0;
+		dp->d_namlen = 0;
+		dp->d_name[0] = '\0';
+		tl = (u_int32_t *)&dp->d_name[4];
+		*tl++ = cookie.lval[0];
+		*tl = cookie.lval[1];
+		dp->d_reclen = DIRBLKSIZ;
+		uio_iov_base_add(uiop, DIRBLKSIZ);
+		uio_iov_len_add(uiop, -(DIRBLKSIZ));
+		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
+		uiop->uio_offset += DIRBLKSIZ;
+	}
+
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
@@ -3561,6 +3580,25 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
 			*eofp = eof;
 	}
 
+	/*
+	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
+	 */
+	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
+		dp = (struct dirent *)uio_iov_base(uiop);
+		dp->d_type = DT_UNKNOWN;
+		dp->d_fileno = 0;
+		dp->d_namlen = 0;
+		dp->d_name[0] = '\0';
+		tl = (u_int32_t *)&dp->d_name[4];
+		*tl++ = cookie.lval[0];
+		*tl = cookie.lval[1];
+		dp->d_reclen = DIRBLKSIZ;
+		uio_iov_base_add(uiop, DIRBLKSIZ);
+		uio_iov_len_add(uiop, -(DIRBLKSIZ));
+		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
+		uiop->uio_offset += DIRBLKSIZ;
+	}
+
 nfsmout:
 	if (nd->nd_mrep != NULL)
 		mbuf_freem(nd->nd_mrep);
diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c
index e68a18b..7326038 100644
--- a/sys/fs/nfsserver/nfs_nfsdkrpc.c
+++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c
@@ -547,6 +547,7 @@ nfsrvd_init(int terminating)
 	if (terminating) {
 		nfsd_master_proc = NULL;
 		NFSD_UNLOCK();
+		nfsrv_freeallbackchannel_xprts();
 		svcpool_destroy(nfsrvd_pool);
 		nfsrvd_pool = NULL;
 		NFSD_LOCK();
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index c6d9448..37fb3b6 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -4188,10 +4188,23 @@ nfsrv_docallback(struct nfsclient *clp, int procnum,
 	if (!error) {
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			KASSERT(sep != NULL, ("sep NULL"));
-			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
-			    NULL, NULL, cred, clp->lc_program,
-			    clp->lc_req.nr_vers, NULL, 1, NULL,
-			    &sep->sess_cbsess);
+			if (sep->sess_cbsess.nfsess_xprt != NULL)
+				error = newnfs_request(nd, NULL, clp,
+				    &clp->lc_req, NULL, NULL, cred,
+				    clp->lc_program, clp->lc_req.nr_vers, NULL,
+				    1, NULL, &sep->sess_cbsess);
+			else {
+				/*
+				 * This should probably never occur, but if a
+				 * client somehow does an RPC without a
+				 * SequenceID Op that causes a callback just
+				 * after the nfsd threads have been terminated
+				 * and restared we could conceivably get here
+				 * without a backchannel xprt.
+				 */
+				printf("nfsrv_docallback: no xprt\n");
+				error = ECONNREFUSED;
+			}
 			nfsrv_freesession(sep, NULL);
 		} else
 			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
@@ -5776,14 +5789,16 @@ nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
 	 * If this session handles the backchannel, save the nd_xprt for this
 	 * RPC, since this is the one being used.
 	 */
-	if (sep->sess_cbsess.nfsess_xprt != NULL &&
+	if (sep->sess_clp->lc_req.nr_client != NULL &&
 	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
 		savxprt = sep->sess_cbsess.nfsess_xprt;
 		SVC_ACQUIRE(nd->nd_xprt);
-		nd->nd_xprt->xp_p2 = savxprt->xp_p2;
+		nd->nd_xprt->xp_p2 =
+		    sep->sess_clp->lc_req.nr_client->cl_private;
 		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
 		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
-		SVC_RELEASE(savxprt);
+		if (savxprt != NULL)
+			SVC_RELEASE(savxprt);
 	}
 
 	*sflagsp = 0;
@@ -6042,3 +6057,29 @@ nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 	return (0);
 }
 
+/*
+ * Free up all backchannel xprts.  This needs to be done when the nfsd threads
+ * exit, since those transports will all be going away.
+ * This is only called after all the nfsd threads are done performing RPCs,
+ * so locking shouldn't be an issue.
+ */
+APPLESTATIC void
+nfsrv_freeallbackchannel_xprts(void)
+{
+	struct nfsdsession *sep;
+	struct nfsclient *clp;
+	SVCXPRT *xprt;
+	int i;
+
+	for (i = 0; i < nfsrv_clienthashsize; i++) {
+		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
+			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
+				xprt = sep->sess_cbsess.nfsess_xprt;
+				sep->sess_cbsess.nfsess_xprt = NULL;
+				if (xprt != NULL)
+					SVC_RELEASE(xprt);
+			}
+		}
+	}
+}
+
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index b5f5b42..2df1e25 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2425,6 +2425,10 @@ vdrop(struct vnode *vp)
  * Drop the hold count of the vnode.  If this is the last reference to
  * the vnode we place it on the free list unless it has been vgone'd
  * (marked VI_DOOMED) in which case we will free it.
+ *
+ * Because the vnode vm object keeps a hold reference on the vnode if
+ * there is at least one resident non-cached page, the vnode cannot
+ * leave the active list without the page cleanup done.
  */
 void
 vdropl(struct vnode *vp)
@@ -2540,11 +2544,13 @@ vinactive(struct vnode *vp, struct thread *td)
 	VI_UNLOCK(vp);
 	/*
 	 * Before moving off the active list, we must be sure that any
-	 * modified pages are on the vnode's dirty list since these will
-	 * no longer be checked once the vnode is on the inactive list.
-	 * Because the vnode vm object keeps a hold reference on the vnode
-	 * if there is at least one resident non-cached page, the vnode
-	 * cannot leave the active list without the page cleanup done.
+	 * modified pages are converted into the vnode's dirty
+	 * buffers, since these will no longer be checked once the
+	 * vnode is on the inactive list.
+	 *
+	 * The write-out of the dirty pages is asynchronous.  At the
+	 * point that VOP_INACTIVE() is called, there could still be
+	 * pending I/O and dirty pages in the object.
 	 */
 	obj = vp->v_object;
 	if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
diff --git a/sys/modules/Makefile.inc b/sys/modules/Makefile.inc
index 9dc38af..b20b99b 100644
--- a/sys/modules/Makefile.inc
+++ b/sys/modules/Makefile.inc
@@ -4,3 +4,4 @@
 CFLAGS+= -DPC98
 .endif
 
+SUBDIR_PARALLEL=	yes
diff --git a/sys/modules/drm2/Makefile b/sys/modules/drm2/Makefile
index 3671c1a..e4e19fb 100644
--- a/sys/modules/drm2/Makefile
+++ b/sys/modules/drm2/Makefile
@@ -2,8 +2,6 @@
 
 .include <bsd.own.mk>
 
-SUBDIR_PARALLEL=
-
 .if ${MACHINE_CPUARCH} == "amd64"
 _i915kms=	i915kms
 _radeonkms=	radeonkms
diff --git a/sys/modules/drm2/radeonkmsfw/Makefile b/sys/modules/drm2/radeonkmsfw/Makefile
index f885da9..167743c 100644
--- a/sys/modules/drm2/radeonkmsfw/Makefile
+++ b/sys/modules/drm2/radeonkmsfw/Makefile
@@ -1,7 +1,5 @@
 # $FreeBSD$
 
-SUBDIR_PARALLEL=
-
 SUBDIR=									\
 	ARUBA_me							\
 	ARUBA_pfp							\
diff --git a/sys/modules/netgraph/Makefile b/sys/modules/netgraph/Makefile
index 03873e4..dc44ac7 100644
--- a/sys/modules/netgraph/Makefile
+++ b/sys/modules/netgraph/Makefile
@@ -62,6 +62,4 @@ _bluetooth=	bluetooth
 _mppc=		mppc
 .endif
 
-SUBDIR_PARALLEL=
-
 .include <bsd.subdir.mk>
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
index c138f14..34a60a8 100644
--- a/sys/netinet/igmp.c
+++ b/sys/netinet/igmp.c
@@ -3327,6 +3327,15 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
 	    ("%s: called when version %d", __func__, igi->igi_version));
 
+	/*
+	 * Check that there are some packets queued. If so, send them first.
+	 * For large number of groups the reply to general query can take
+	 * many packets, we should finish sending them before starting of
+	 * queuing the new reply.
+	 */
+	if (igi->igi_gq.ifq_head != NULL)
+		goto send;
+
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_RLOCK(ifp);
@@ -3362,6 +3371,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
+send:
 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 
diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c
index 77b19bf..6e0f95d 100644
--- a/sys/netinet6/mld6.c
+++ b/sys/netinet6/mld6.c
@@ -2989,6 +2989,15 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
 	KASSERT(mli->mli_version == MLD_VERSION_2,
 	    ("%s: called when version %d", __func__, mli->mli_version));
 
+	/*
+	 * Check that there are some packets queued. If so, send them first.
+	 * For large number of groups the reply to general query can take
+	 * many packets, we should finish sending them before starting of
+	 * queuing the new reply.
+	 */
+	if (mli->mli_gq.ifq_head != NULL)
+		goto send;
+
 	ifp = mli->mli_ifp;
 
 	IF_ADDR_RLOCK(ifp);
@@ -3024,6 +3033,7 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
+send:
 	mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
 
 	/*
diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c
index 694362a..b6cfa62 100644
--- a/sys/netpfil/ipfw/ip_fw_dynamic.c
+++ b/sys/netpfil/ipfw/ip_fw_dynamic.c
@@ -715,6 +715,9 @@ ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 		id.fib = M_GETFIB(args->m);
 
 		if (IS_IP6_FLOW_ID (&(args->f_id))) {
+			bzero(&id.src_ip6, sizeof(id.src_ip6));
+			bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip6 = args->f_id.src_ip6;
 			if (limit_mask & DYN_DST_ADDR)
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
index 60c1beb..fd110c7 100644
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@@ -60,10 +60,8 @@ static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
     vm_ooffset_t, struct ucred *);
 static void dev_pager_dealloc(vm_object_t);
 static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int);
-static void dev_pager_putpages(vm_object_t, vm_page_t *, int, 
-		boolean_t, int *);
-static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *,
-		int *);
+static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *);
+static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
 static void dev_pager_free_page(vm_object_t object, vm_page_t m);
 
 /* list of device pager objects */
@@ -101,8 +99,9 @@ static struct cdev_pager_ops old_dev_pager_ops = {
 };
 
 static void
-dev_pager_init()
+dev_pager_init(void)
 {
+
 	TAILQ_INIT(&dev_pager_object_list);
 	mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF);
 }
@@ -231,8 +230,7 @@ dev_pager_free_page(vm_object_t object, vm_page_t m)
 }
 
 static void
-dev_pager_dealloc(object)
-	vm_object_t object;
+dev_pager_dealloc(vm_object_t object)
 {
 	vm_page_t m;
 
@@ -362,24 +360,18 @@ old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
 }
 
 static void
-dev_pager_putpages(object, m, count, sync, rtvals)
-	vm_object_t object;
-	vm_page_t *m;
-	int count;
-	boolean_t sync;
-	int *rtvals;
+dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
+    int *rtvals)
 {
 
 	panic("dev_pager_putpage called");
 }
 
 static boolean_t
-dev_pager_haspage(object, pindex, before, after)
-	vm_object_t object;
-	vm_pindex_t pindex;
-	int *before;
-	int *after;
+dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
+    int *after)
 {
+
 	if (before != NULL)
 		*before = 0;
 	if (after != NULL)
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index accf517..7ecb6c7 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -227,6 +227,7 @@ struct vm_domain {
 	long vmd_segs;	/* bitmask of the segments */
 	boolean_t vmd_oom;
 	int vmd_pass;	/* local pagedaemon pass */
+	int vmd_oom_seq;
 	int vmd_last_active_scan;
 	struct vm_page vmd_marker; /* marker for pagedaemon private use */
 };
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 2cc738d..156d11a 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -121,7 +121,8 @@ static void vm_pageout(void);
 static void vm_pageout_init(void);
 static int vm_pageout_clean(vm_page_t);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
-static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
+static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage);
 
 SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
     NULL);
@@ -158,6 +159,7 @@ int vm_pages_needed;		/* Event on which pageout daemon sleeps */
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
 int vm_pageout_wakeup_thresh;
+static int vm_pageout_oom_seq = 12;
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
@@ -217,6 +219,10 @@ static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
+SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
+	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
+	"back-to-back calls to oom detector to start OOM");
+
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
@@ -941,7 +947,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 	long min_scan;
 	int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
 	int vnodes_skipped = 0;
-	int maxlaunder, scan_tick, scanned;
+	int maxlaunder, scan_tick, scanned, starting_page_shortage;
 	int lockmode;
 	boolean_t queues_locked;
 
@@ -981,6 +987,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
+	starting_page_shortage = page_shortage;
 
 	/*
 	 * maxlaunder limits the number of dirty pages we flush per scan.
@@ -1358,6 +1365,12 @@ relock_queues:
 		(void)speedup_syncer();
 
 	/*
+	 * If the inactive queue scan fails repeatedly to meet its
+	 * target, kill the largest process.
+	 */
+	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
+
+	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to the inactive queue.
 	 */
@@ -1469,15 +1482,6 @@ relock_queues:
 		}
 	}
 #endif
-
-	/*
-	 * If we are critically low on one of RAM or swap and low on
-	 * the other, kill the largest process.  However, we avoid
-	 * doing this on the first pass in order to give ourselves a
-	 * chance to flush out dirty vnode-backed pages and to allow
-	 * active pages to be moved to the inactive queue and reclaimed.
-	 */
-	vm_pageout_mightbe_oom(vmd, pass);
 }
 
 static int vm_pageout_oom_vote;
@@ -1488,12 +1492,17 @@ static int vm_pageout_oom_vote;
  * failed to reach free target is premature.
  */
 static void
-vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
+vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage)
 {
 	int old_vote;
 
-	if (pass <= 1 || !((swap_pager_avail < 64 && vm_page_count_min()) ||
-	    (swap_pager_full && vm_paging_target() > 0))) {
+	if (starting_page_shortage <= 0 || starting_page_shortage !=
+	    page_shortage)
+		vmd->vmd_oom_seq = 0;
+	else
+		vmd->vmd_oom_seq++;
+	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
@@ -1501,6 +1510,12 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
 		return;
 	}
 
+	/*
+	 * Do not follow the call sequence until OOM condition is
+	 * cleared.
+	 */
+	vmd->vmd_oom_seq = 0;
+
 	if (vmd->vmd_oom)
 		return;
 
@@ -1526,6 +1541,65 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
+/*
+ * The OOM killer is the page daemon's action of last resort when
+ * memory allocation requests have been stalled for a prolonged period
+ * of time because it cannot reclaim memory.  This function computes
+ * the approximate number of physical pages that could be reclaimed if
+ * the specified address space is destroyed.
+ *
+ * Private, anonymous memory owned by the address space is the
+ * principal resource that we expect to recover after an OOM kill.
+ * Since the physical pages mapped by the address space's COW entries
+ * are typically shared pages, they are unlikely to be released and so
+ * they are not counted.
+ *
+ * To get to the point where the page daemon runs the OOM killer, its
+ * efforts to write-back vnode-backed pages may have stalled.  This
+ * could be caused by a memory allocation deadlock in the write path
+ * that might be resolved by an OOM kill.  Therefore, physical pages
+ * belonging to vnode-backed objects are counted, because they might
+ * be freed without being written out first if the address space holds
+ * the last reference to an unlinked vnode.
+ *
+ * Similarly, physical pages belonging to OBJT_PHYS objects are
+ * counted because the address space might hold the last reference to
+ * the object.
+ */
+static long
+vm_pageout_oom_pagecount(struct vmspace *vmspace)
+{
+	vm_map_t map;
+	vm_map_entry_t entry;
+	vm_object_t obj;
+	long res;
+
+	map = &vmspace->vm_map;
+	KASSERT(!map->system_map, ("system map"));
+	sx_assert(&map->lock, SA_LOCKED);
+	res = 0;
+	for (entry = map->header.next; entry != &map->header;
+	    entry = entry->next) {
+		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
+			continue;
+		obj = entry->object.vm_object;
+		if (obj == NULL)
+			continue;
+		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
+		    obj->ref_count != 1)
+			continue;
+		switch (obj->type) {
+		case OBJT_DEFAULT:
+		case OBJT_SWAP:
+		case OBJT_PHYS:
+		case OBJT_VNODE:
+			res += obj->resident_page_count;
+			break;
+		}
+	}
+	return (res);
+}
+
 void
 vm_pageout_oom(int shortage)
 {
@@ -1570,7 +1644,8 @@ vm_pageout_oom(int shortage)
 			if (!TD_ON_RUNQ(td) &&
 			    !TD_IS_RUNNING(td) &&
 			    !TD_IS_SLEEPING(td) &&
-			    !TD_IS_SUSPENDED(td)) {
+			    !TD_IS_SUSPENDED(td) &&
+			    !TD_IS_SWAPPED(td)) {
 				thread_unlock(td);
 				breakout = 1;
 				break;
@@ -1598,12 +1673,13 @@ vm_pageout_oom(int shortage)
 		}
 		PROC_UNLOCK(p);
 		size = vmspace_swap_count(vm);
-		vm_map_unlock_read(&vm->vm_map);
 		if (shortage == VM_OOM_MEM)
-			size += vmspace_resident_count(vm);
+			size += vm_pageout_oom_pagecount(vm);
+		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
+
 		/*
-		 * if the this process is bigger than the biggest one
+		 * If this process is bigger than the biggest one,
 		 * remember it.
 		 */
 		if (size > bigsize) {
diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c
index 1986f6f..6c6f153 100644
--- a/sys/x86/xen/hvm.c
+++ b/sys/x86/xen/hvm.c
@@ -72,7 +72,6 @@ static driver_filter_t xen_cpustop_handler;
 static driver_filter_t xen_cpususpend_handler;
 static driver_filter_t xen_cpustophard_handler;
 static void xen_ipi_vectored(u_int vector, int dest);
-static void xen_hvm_cpu_resume(void);
 #endif
 static void xen_hvm_cpu_init(void);
 
@@ -84,9 +83,6 @@ extern void pmap_lazyfix_action(void);
 extern int pmap_pcid_enabled;
 #endif
 
-/* Variables used by mp_machdep to perform the bitmap IPI */
-extern volatile u_int cpu_ipi_pending[MAXCPU];
-
 /*---------------------------------- Macros ----------------------------------*/
 #define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
 
@@ -110,7 +106,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE;
 struct cpu_ops xen_hvm_cpu_ops = {
 	.ipi_vectored	= lapic_ipi_vectored,
 	.cpu_init	= xen_hvm_cpu_init,
-	.cpu_resume	= xen_hvm_cpu_resume
+	.cpu_resume	= xen_hvm_cpu_init
 };
 #endif
 
@@ -312,21 +308,6 @@ xen_ipi_vectored(u_int vector, int dest)
 
 /*---------------------- XEN diverged cpu operations -------------------------*/
 static void
-xen_hvm_cpu_resume(void)
-{
-	u_int cpuid = PCPU_GET(cpuid);
-
-	/*
-	 * Reset pending bitmap IPIs, because Xen doesn't preserve pending
-	 * event channels on migration.
-	 */
-	cpu_ipi_pending[cpuid] = 0;
-
-	/* register vcpu_info area */
-	xen_hvm_cpu_init();
-}
-
-static void
 xen_cpu_ipi_init(int cpu)
 {
 	xen_intr_handle_t *ipi_handle;