summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES2
-rw-r--r--sys/conf/files6
-rw-r--r--sys/dev/filemon/filemon.c4
-rw-r--r--sys/dev/xen/control/control.c3
-rw-r--r--sys/fs/nfs/nfs_var.h1
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c38
-rw-r--r--sys/fs/nfsserver/nfs_nfsdkrpc.c1
-rw-r--r--sys/fs/nfsserver/nfs_nfsdstate.c55
-rw-r--r--sys/kern/vfs_subr.c16
-rw-r--r--sys/modules/Makefile.inc1
-rw-r--r--sys/modules/drm2/Makefile2
-rw-r--r--sys/modules/drm2/radeonkmsfw/Makefile2
-rw-r--r--sys/modules/netgraph/Makefile2
-rw-r--r--sys/netinet/igmp.c10
-rw-r--r--sys/netinet6/mld6.c10
-rw-r--r--sys/netpfil/ipfw/ip_fw_dynamic.c3
-rw-r--r--sys/vm/device_pager.c28
-rw-r--r--sys/vm/vm_page.h1
-rw-r--r--sys/vm/vm_pageout.c112
-rw-r--r--sys/x86/xen/hvm.c21
20 files changed, 237 insertions, 81 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index e05e366..9c8b9c9 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -2706,6 +2706,8 @@ device uvisor
# USB serial support for DDI pocket's PHS
device uvscom
#
+# USB ethernet support
+device uether
# ADMtek USB ethernet. Supports the LinkSys USB100TX,
# the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
# and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
diff --git a/sys/conf/files b/sys/conf/files
index e124bf4..f6456a7 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2529,9 +2529,9 @@ dev/usb/net/if_udav.c optional udav
dev/usb/net/if_usie.c optional usie
dev/usb/net/if_urndis.c optional urndis
dev/usb/net/ruephy.c optional rue
-dev/usb/net/usb_ethernet.c optional aue | axe | axge | cdce | cue | kue | \
- mos | rue | smsc | udav | ipheth | \
- urndis
+dev/usb/net/usb_ethernet.c optional uether | aue | axe | axge | cdce | \
+ cue | ipheth | kue | mos | rue | \
+ smsc | udav | urndis
dev/usb/net/uhso.c optional uhso
#
# USB WLAN drivers
diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c
index f8a698f..b302de9 100644
--- a/sys/dev/filemon/filemon.c
+++ b/sys/dev/filemon/filemon.c
@@ -43,7 +43,6 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
-#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/queue.h>
@@ -195,9 +194,6 @@ filemon_open(struct cdev *dev, int oflags __unused, int devtype __unused,
if (filemon == NULL) {
filemon = malloc(sizeof(struct filemon), M_FILEMON,
M_WAITOK | M_ZERO);
-
- filemon->fp = NULL;
-
sx_init(&filemon->lock, "filemon");
}
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index bc0609d..60e448a 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -127,6 +127,7 @@ __FBSDID("$FreeBSD$");
#include <machine/_inttypes.h>
#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -403,6 +404,8 @@ xctrl_suspend()
gnttab_resume();
#ifdef SMP
+ /* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
+ lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
/*
* Now that event channels have been initialized,
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 2abd7e4..d540dc9 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -135,6 +135,7 @@ int nfsrv_checksequence(struct nfsrv_descript *, uint32_t, uint32_t *,
uint32_t *, int, uint32_t *, NFSPROC_T *);
int nfsrv_checkreclaimcomplete(struct nfsrv_descript *);
void nfsrv_cache_session(uint8_t *, uint32_t, int, struct mbuf **);
+void nfsrv_freeallbackchannel_xprts(void);
/* nfs_nfsdserv.c */
int nfsrvd_access(struct nfsrv_descript *, int,
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index bfe6fa3..429cfcc 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3087,6 +3087,25 @@ nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
*eofp = eof;
}
+ /*
+ * Add extra empty records to any remaining DIRBLKSIZ chunks.
+ */
+ while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
+ dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = 0;
+ dp->d_namlen = 0;
+ dp->d_name[0] = '\0';
+ tl = (u_int32_t *)&dp->d_name[4];
+ *tl++ = cookie.lval[0];
+ *tl = cookie.lval[1];
+ dp->d_reclen = DIRBLKSIZ;
+ uio_iov_base_add(uiop, DIRBLKSIZ);
+ uio_iov_len_add(uiop, -(DIRBLKSIZ));
+ uio_uio_resid_add(uiop, -(DIRBLKSIZ));
+ uiop->uio_offset += DIRBLKSIZ;
+ }
+
nfsmout:
if (nd->nd_mrep != NULL)
mbuf_freem(nd->nd_mrep);
@@ -3561,6 +3580,25 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
*eofp = eof;
}
+ /*
+ * Add extra empty records to any remaining DIRBLKSIZ chunks.
+ */
+ while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
+ dp = (struct dirent *)uio_iov_base(uiop);
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = 0;
+ dp->d_namlen = 0;
+ dp->d_name[0] = '\0';
+ tl = (u_int32_t *)&dp->d_name[4];
+ *tl++ = cookie.lval[0];
+ *tl = cookie.lval[1];
+ dp->d_reclen = DIRBLKSIZ;
+ uio_iov_base_add(uiop, DIRBLKSIZ);
+ uio_iov_len_add(uiop, -(DIRBLKSIZ));
+ uio_uio_resid_add(uiop, -(DIRBLKSIZ));
+ uiop->uio_offset += DIRBLKSIZ;
+ }
+
nfsmout:
if (nd->nd_mrep != NULL)
mbuf_freem(nd->nd_mrep);
diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c
index e68a18b..7326038 100644
--- a/sys/fs/nfsserver/nfs_nfsdkrpc.c
+++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c
@@ -547,6 +547,7 @@ nfsrvd_init(int terminating)
if (terminating) {
nfsd_master_proc = NULL;
NFSD_UNLOCK();
+ nfsrv_freeallbackchannel_xprts();
svcpool_destroy(nfsrvd_pool);
nfsrvd_pool = NULL;
NFSD_LOCK();
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
index c6d9448..37fb3b6 100644
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -4188,10 +4188,23 @@ nfsrv_docallback(struct nfsclient *clp, int procnum,
if (!error) {
if ((nd->nd_flag & ND_NFSV41) != 0) {
KASSERT(sep != NULL, ("sep NULL"));
- error = newnfs_request(nd, NULL, clp, &clp->lc_req,
- NULL, NULL, cred, clp->lc_program,
- clp->lc_req.nr_vers, NULL, 1, NULL,
- &sep->sess_cbsess);
+ if (sep->sess_cbsess.nfsess_xprt != NULL)
+ error = newnfs_request(nd, NULL, clp,
+ &clp->lc_req, NULL, NULL, cred,
+ clp->lc_program, clp->lc_req.nr_vers, NULL,
+ 1, NULL, &sep->sess_cbsess);
+ else {
+ /*
+ * This should probably never occur, but if a
+ * client somehow does an RPC without a
+ * SequenceID Op that causes a callback just
+ * after the nfsd threads have been terminated
+ * and restared we could conceivably get here
+ * without a backchannel xprt.
+ */
+ printf("nfsrv_docallback: no xprt\n");
+ error = ECONNREFUSED;
+ }
nfsrv_freesession(sep, NULL);
} else
error = newnfs_request(nd, NULL, clp, &clp->lc_req,
@@ -5776,14 +5789,16 @@ nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
* If this session handles the backchannel, save the nd_xprt for this
* RPC, since this is the one being used.
*/
- if (sep->sess_cbsess.nfsess_xprt != NULL &&
+ if (sep->sess_clp->lc_req.nr_client != NULL &&
(sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
savxprt = sep->sess_cbsess.nfsess_xprt;
SVC_ACQUIRE(nd->nd_xprt);
- nd->nd_xprt->xp_p2 = savxprt->xp_p2;
+ nd->nd_xprt->xp_p2 =
+ sep->sess_clp->lc_req.nr_client->cl_private;
nd->nd_xprt->xp_idletimeout = 0; /* Disable timeout. */
sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
- SVC_RELEASE(savxprt);
+ if (savxprt != NULL)
+ SVC_RELEASE(savxprt);
}
*sflagsp = 0;
@@ -6042,3 +6057,29 @@ nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
return (0);
}
+/*
+ * Free up all backchannel xprts. This needs to be done when the nfsd threads
+ * exit, since those transports will all be going away.
+ * This is only called after all the nfsd threads are done performing RPCs,
+ * so locking shouldn't be an issue.
+ */
+APPLESTATIC void
+nfsrv_freeallbackchannel_xprts(void)
+{
+ struct nfsdsession *sep;
+ struct nfsclient *clp;
+ SVCXPRT *xprt;
+ int i;
+
+ for (i = 0; i < nfsrv_clienthashsize; i++) {
+ LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
+ LIST_FOREACH(sep, &clp->lc_session, sess_list) {
+ xprt = sep->sess_cbsess.nfsess_xprt;
+ sep->sess_cbsess.nfsess_xprt = NULL;
+ if (xprt != NULL)
+ SVC_RELEASE(xprt);
+ }
+ }
+ }
+}
+
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index b5f5b42..2df1e25 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2425,6 +2425,10 @@ vdrop(struct vnode *vp)
* Drop the hold count of the vnode. If this is the last reference to
* the vnode we place it on the free list unless it has been vgone'd
* (marked VI_DOOMED) in which case we will free it.
+ *
+ * Because the vnode vm object keeps a hold reference on the vnode if
+ * there is at least one resident non-cached page, the vnode cannot
+ * leave the active list without the page cleanup done.
*/
void
vdropl(struct vnode *vp)
@@ -2540,11 +2544,13 @@ vinactive(struct vnode *vp, struct thread *td)
VI_UNLOCK(vp);
/*
* Before moving off the active list, we must be sure that any
- * modified pages are on the vnode's dirty list since these will
- * no longer be checked once the vnode is on the inactive list.
- * Because the vnode vm object keeps a hold reference on the vnode
- * if there is at least one resident non-cached page, the vnode
- * cannot leave the active list without the page cleanup done.
+ * modified pages are converted into the vnode's dirty
+ * buffers, since these will no longer be checked once the
+ * vnode is on the inactive list.
+ *
+ * The write-out of the dirty pages is asynchronous. At the
+ * point that VOP_INACTIVE() is called, there could still be
+ * pending I/O and dirty pages in the object.
*/
obj = vp->v_object;
if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
diff --git a/sys/modules/Makefile.inc b/sys/modules/Makefile.inc
index 9dc38af..b20b99b 100644
--- a/sys/modules/Makefile.inc
+++ b/sys/modules/Makefile.inc
@@ -4,3 +4,4 @@
CFLAGS+= -DPC98
.endif
+SUBDIR_PARALLEL= yes
diff --git a/sys/modules/drm2/Makefile b/sys/modules/drm2/Makefile
index 3671c1a..e4e19fb 100644
--- a/sys/modules/drm2/Makefile
+++ b/sys/modules/drm2/Makefile
@@ -2,8 +2,6 @@
.include <bsd.own.mk>
-SUBDIR_PARALLEL=
-
.if ${MACHINE_CPUARCH} == "amd64"
_i915kms= i915kms
_radeonkms= radeonkms
diff --git a/sys/modules/drm2/radeonkmsfw/Makefile b/sys/modules/drm2/radeonkmsfw/Makefile
index f885da9..167743c 100644
--- a/sys/modules/drm2/radeonkmsfw/Makefile
+++ b/sys/modules/drm2/radeonkmsfw/Makefile
@@ -1,7 +1,5 @@
# $FreeBSD$
-SUBDIR_PARALLEL=
-
SUBDIR= \
ARUBA_me \
ARUBA_pfp \
diff --git a/sys/modules/netgraph/Makefile b/sys/modules/netgraph/Makefile
index 03873e4..dc44ac7 100644
--- a/sys/modules/netgraph/Makefile
+++ b/sys/modules/netgraph/Makefile
@@ -62,6 +62,4 @@ _bluetooth= bluetooth
_mppc= mppc
.endif
-SUBDIR_PARALLEL=
-
.include <bsd.subdir.mk>
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
index c138f14..34a60a8 100644
--- a/sys/netinet/igmp.c
+++ b/sys/netinet/igmp.c
@@ -3327,6 +3327,15 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
KASSERT(igi->igi_version == IGMP_VERSION_3,
("%s: called when version %d", __func__, igi->igi_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (igi->igi_gq.ifq_head != NULL)
+ goto send;
+
ifp = igi->igi_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3362,6 +3371,7 @@ igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
}
IF_ADDR_RUNLOCK(ifp);
+send:
loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c
index 77b19bf..6e0f95d 100644
--- a/sys/netinet6/mld6.c
+++ b/sys/netinet6/mld6.c
@@ -2989,6 +2989,15 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
KASSERT(mli->mli_version == MLD_VERSION_2,
("%s: called when version %d", __func__, mli->mli_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (mli->mli_gq.ifq_head != NULL)
+ goto send;
+
ifp = mli->mli_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3024,6 +3033,7 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
}
IF_ADDR_RUNLOCK(ifp);
+send:
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
/*
diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c
index 694362a..b6cfa62 100644
--- a/sys/netpfil/ipfw/ip_fw_dynamic.c
+++ b/sys/netpfil/ipfw/ip_fw_dynamic.c
@@ -715,6 +715,9 @@ ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
id.fib = M_GETFIB(args->m);
if (IS_IP6_FLOW_ID (&(args->f_id))) {
+ bzero(&id.src_ip6, sizeof(id.src_ip6));
+ bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
if (limit_mask & DYN_SRC_ADDR)
id.src_ip6 = args->f_id.src_ip6;
if (limit_mask & DYN_DST_ADDR)
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
index 60c1beb..fd110c7 100644
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@@ -60,10 +60,8 @@ static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
vm_ooffset_t, struct ucred *);
static void dev_pager_dealloc(vm_object_t);
static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int);
-static void dev_pager_putpages(vm_object_t, vm_page_t *, int,
- boolean_t, int *);
-static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *,
- int *);
+static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *);
+static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
static void dev_pager_free_page(vm_object_t object, vm_page_t m);
/* list of device pager objects */
@@ -101,8 +99,9 @@ static struct cdev_pager_ops old_dev_pager_ops = {
};
static void
-dev_pager_init()
+dev_pager_init(void)
{
+
TAILQ_INIT(&dev_pager_object_list);
mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF);
}
@@ -231,8 +230,7 @@ dev_pager_free_page(vm_object_t object, vm_page_t m)
}
static void
-dev_pager_dealloc(object)
- vm_object_t object;
+dev_pager_dealloc(vm_object_t object)
{
vm_page_t m;
@@ -362,24 +360,18 @@ old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
}
static void
-dev_pager_putpages(object, m, count, sync, rtvals)
- vm_object_t object;
- vm_page_t *m;
- int count;
- boolean_t sync;
- int *rtvals;
+dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
+ int *rtvals)
{
panic("dev_pager_putpage called");
}
static boolean_t
-dev_pager_haspage(object, pindex, before, after)
- vm_object_t object;
- vm_pindex_t pindex;
- int *before;
- int *after;
+dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
+ int *after)
{
+
if (before != NULL)
*before = 0;
if (after != NULL)
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index accf517..7ecb6c7 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -227,6 +227,7 @@ struct vm_domain {
long vmd_segs; /* bitmask of the segments */
boolean_t vmd_oom;
int vmd_pass; /* local pagedaemon pass */
+ int vmd_oom_seq;
int vmd_last_active_scan;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
};
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 2cc738d..156d11a 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -121,7 +121,8 @@ static void vm_pageout(void);
static void vm_pageout_init(void);
static int vm_pageout_clean(vm_page_t);
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
-static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
+static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+ int starting_page_shortage);
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
NULL);
@@ -158,6 +159,7 @@ int vm_pages_needed; /* Event on which pageout daemon sleeps */
int vm_pageout_deficit; /* Estimated number of pages deficit */
int vm_pageout_pages_needed; /* flag saying that the pageout daemon needs pages */
int vm_pageout_wakeup_thresh;
+static int vm_pageout_oom_seq = 12;
#if !defined(NO_SWAPPING)
static int vm_pageout_req_swapout; /* XXX */
@@ -217,6 +219,10 @@ static int pageout_lock_miss;
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
+SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
+ CTLFLAG_RW, &vm_pageout_oom_seq, 0,
+ "back-to-back calls to oom detector to start OOM");
+
#define VM_PAGEOUT_PAGE_COUNT 16
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
@@ -941,7 +947,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
long min_scan;
int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
int vnodes_skipped = 0;
- int maxlaunder, scan_tick, scanned;
+ int maxlaunder, scan_tick, scanned, starting_page_shortage;
int lockmode;
boolean_t queues_locked;
@@ -981,6 +987,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
page_shortage = vm_paging_target() + deficit;
} else
page_shortage = deficit = 0;
+ starting_page_shortage = page_shortage;
/*
* maxlaunder limits the number of dirty pages we flush per scan.
@@ -1358,6 +1365,12 @@ relock_queues:
(void)speedup_syncer();
/*
+ * If the inactive queue scan fails repeatedly to meet its
+ * target, kill the largest process.
+ */
+ vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
+
+ /*
* Compute the number of pages we want to try to move from the
* active queue to the inactive queue.
*/
@@ -1469,15 +1482,6 @@ relock_queues:
}
}
#endif
-
- /*
- * If we are critically low on one of RAM or swap and low on
- * the other, kill the largest process. However, we avoid
- * doing this on the first pass in order to give ourselves a
- * chance to flush out dirty vnode-backed pages and to allow
- * active pages to be moved to the inactive queue and reclaimed.
- */
- vm_pageout_mightbe_oom(vmd, pass);
}
static int vm_pageout_oom_vote;
@@ -1488,12 +1492,17 @@ static int vm_pageout_oom_vote;
* failed to reach free target is premature.
*/
static void
-vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
+vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+ int starting_page_shortage)
{
int old_vote;
- if (pass <= 1 || !((swap_pager_avail < 64 && vm_page_count_min()) ||
- (swap_pager_full && vm_paging_target() > 0))) {
+ if (starting_page_shortage <= 0 || starting_page_shortage !=
+ page_shortage)
+ vmd->vmd_oom_seq = 0;
+ else
+ vmd->vmd_oom_seq++;
+ if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
if (vmd->vmd_oom) {
vmd->vmd_oom = FALSE;
atomic_subtract_int(&vm_pageout_oom_vote, 1);
@@ -1501,6 +1510,12 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
return;
}
+ /*
+ * Do not follow the call sequence until OOM condition is
+ * cleared.
+ */
+ vmd->vmd_oom_seq = 0;
+
if (vmd->vmd_oom)
return;
@@ -1526,6 +1541,65 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
atomic_subtract_int(&vm_pageout_oom_vote, 1);
}
+/*
+ * The OOM killer is the page daemon's action of last resort when
+ * memory allocation requests have been stalled for a prolonged period
+ * of time because it cannot reclaim memory. This function computes
+ * the approximate number of physical pages that could be reclaimed if
+ * the specified address space is destroyed.
+ *
+ * Private, anonymous memory owned by the address space is the
+ * principal resource that we expect to recover after an OOM kill.
+ * Since the physical pages mapped by the address space's COW entries
+ * are typically shared pages, they are unlikely to be released and so
+ * they are not counted.
+ *
+ * To get to the point where the page daemon runs the OOM killer, its
+ * efforts to write-back vnode-backed pages may have stalled. This
+ * could be caused by a memory allocation deadlock in the write path
+ * that might be resolved by an OOM kill. Therefore, physical pages
+ * belonging to vnode-backed objects are counted, because they might
+ * be freed without being written out first if the address space holds
+ * the last reference to an unlinked vnode.
+ *
+ * Similarly, physical pages belonging to OBJT_PHYS objects are
+ * counted because the address space might hold the last reference to
+ * the object.
+ */
+static long
+vm_pageout_oom_pagecount(struct vmspace *vmspace)
+{
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vm_object_t obj;
+ long res;
+
+ map = &vmspace->vm_map;
+ KASSERT(!map->system_map, ("system map"));
+ sx_assert(&map->lock, SA_LOCKED);
+ res = 0;
+ for (entry = map->header.next; entry != &map->header;
+ entry = entry->next) {
+ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
+ continue;
+ obj = entry->object.vm_object;
+ if (obj == NULL)
+ continue;
+ if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
+ obj->ref_count != 1)
+ continue;
+ switch (obj->type) {
+ case OBJT_DEFAULT:
+ case OBJT_SWAP:
+ case OBJT_PHYS:
+ case OBJT_VNODE:
+ res += obj->resident_page_count;
+ break;
+ }
+ }
+ return (res);
+}
+
void
vm_pageout_oom(int shortage)
{
@@ -1570,7 +1644,8 @@ vm_pageout_oom(int shortage)
if (!TD_ON_RUNQ(td) &&
!TD_IS_RUNNING(td) &&
!TD_IS_SLEEPING(td) &&
- !TD_IS_SUSPENDED(td)) {
+ !TD_IS_SUSPENDED(td) &&
+ !TD_IS_SWAPPED(td)) {
thread_unlock(td);
breakout = 1;
break;
@@ -1598,12 +1673,13 @@ vm_pageout_oom(int shortage)
}
PROC_UNLOCK(p);
size = vmspace_swap_count(vm);
- vm_map_unlock_read(&vm->vm_map);
if (shortage == VM_OOM_MEM)
- size += vmspace_resident_count(vm);
+ size += vm_pageout_oom_pagecount(vm);
+ vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
+
/*
- * if the this process is bigger than the biggest one
+ * If this process is bigger than the biggest one,
* remember it.
*/
if (size > bigsize) {
diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c
index 1986f6f..6c6f153 100644
--- a/sys/x86/xen/hvm.c
+++ b/sys/x86/xen/hvm.c
@@ -72,7 +72,6 @@ static driver_filter_t xen_cpustop_handler;
static driver_filter_t xen_cpususpend_handler;
static driver_filter_t xen_cpustophard_handler;
static void xen_ipi_vectored(u_int vector, int dest);
-static void xen_hvm_cpu_resume(void);
#endif
static void xen_hvm_cpu_init(void);
@@ -84,9 +83,6 @@ extern void pmap_lazyfix_action(void);
extern int pmap_pcid_enabled;
#endif
-/* Variables used by mp_machdep to perform the bitmap IPI */
-extern volatile u_int cpu_ipi_pending[MAXCPU];
-
/*---------------------------------- Macros ----------------------------------*/
#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
@@ -110,7 +106,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE;
struct cpu_ops xen_hvm_cpu_ops = {
.ipi_vectored = lapic_ipi_vectored,
.cpu_init = xen_hvm_cpu_init,
- .cpu_resume = xen_hvm_cpu_resume
+ .cpu_resume = xen_hvm_cpu_init
};
#endif
@@ -312,21 +308,6 @@ xen_ipi_vectored(u_int vector, int dest)
/*---------------------- XEN diverged cpu operations -------------------------*/
static void
-xen_hvm_cpu_resume(void)
-{
- u_int cpuid = PCPU_GET(cpuid);
-
- /*
- * Reset pending bitmap IPIs, because Xen doesn't preserve pending
- * event channels on migration.
- */
- cpu_ipi_pending[cpuid] = 0;
-
- /* register vcpu_info area */
- xen_hvm_cpu_init();
-}
-
-static void
xen_cpu_ipi_init(int cpu)
{
xen_intr_handle_t *ipi_handle;
OpenPOWER on IntegriCloud