summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/init_main.c2
-rw-r--r--sys/kern/kern_conf.c7
-rw-r--r--sys/kern/kern_descrip.c129
-rw-r--r--sys/kern/kern_fork.c6
-rw-r--r--sys/kern/kern_jail.c2
-rw-r--r--sys/kern/kern_ktr.c6
-rw-r--r--sys/kern/kern_lock.c24
-rw-r--r--sys/kern/kern_mbuf.c60
-rw-r--r--sys/kern/kern_mib.c11
-rw-r--r--sys/kern/kern_mutex.c4
-rw-r--r--sys/kern/kern_priv.c18
-rw-r--r--sys/kern/kern_rmlock.c5
-rw-r--r--sys/kern/kern_rwlock.c9
-rw-r--r--sys/kern/kern_shutdown.c142
-rw-r--r--sys/kern/kern_sig.c227
-rw-r--r--sys/kern/kern_switch.c6
-rw-r--r--sys/kern/kern_sx.c9
-rw-r--r--sys/kern/kern_time.c3
-rw-r--r--sys/kern/kern_timeout.c88
-rw-r--r--sys/kern/subr_busdma_bufalloc.c174
-rw-r--r--sys/kern/subr_mchain.c10
-rw-r--r--sys/kern/subr_param.c4
-rw-r--r--sys/kern/subr_smp.c3
-rw-r--r--sys/kern/subr_syscall.c18
-rw-r--r--sys/kern/subr_uio.c1
-rw-r--r--sys/kern/subr_witness.c134
-rw-r--r--sys/kern/sys_generic.c3
-rw-r--r--sys/kern/uipc_domain.c57
-rw-r--r--sys/kern/uipc_mbuf.c20
-rw-r--r--sys/kern/uipc_mbuf2.c6
-rw-r--r--sys/kern/uipc_sockbuf.c8
-rw-r--r--sys/kern/uipc_socket.c147
-rw-r--r--sys/kern/uipc_syscalls.c6
-rw-r--r--sys/kern/uipc_usrreq.c1
-rw-r--r--sys/kern/vfs_bio.c32
-rw-r--r--sys/kern/vfs_export.c2
-rw-r--r--sys/kern/vfs_mount.c10
-rw-r--r--sys/kern/vfs_mountroot.c9
-rw-r--r--sys/kern/vfs_subr.c115
-rw-r--r--sys/kern/vfs_vnops.c82
40 files changed, 995 insertions, 605 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index c30e1a2..00b1c3f 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -498,7 +498,7 @@ proc0_init(void *dummy __unused)
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
strncpy(td->td_name, "swapper", sizeof (td->td_name));
- callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
+ callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c
index 288fac5..c04d1da 100644
--- a/sys/kern/kern_conf.c
+++ b/sys/kern/kern_conf.c
@@ -698,6 +698,13 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
;
for (to = dev->si_name; *from != '\0'; from++, to++) {
+ /*
+ * Spaces and double quotation marks cause
+ * problems for the devctl(4) protocol.
+ * Reject names containing those characters.
+ */
+ if (isspace(*from) || *from == '"')
+ return (EINVAL);
/* Treat multiple sequential slashes as single. */
while (from[0] == '/' && from[1] == '/')
from++;
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 1eb18f1..6dae173 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -133,12 +133,26 @@ static int fill_socket_info(struct socket *so, struct kinfo_file *kif);
static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif);
/*
- * A process is initially started out with NDFILE descriptors stored within
- * this structure, selected to be enough for typical applications based on
- * the historical limit of 20 open files (and the usage of descriptors by
- * shells). If these descriptors are exhausted, a larger descriptor table
- * may be allocated, up to a process' resource limit; the internal arrays
- * are then unused.
+ * Each process has:
+ *
+ * - An array of open file descriptors (fd_ofiles)
+ * - An array of file flags (fd_ofileflags)
+ * - A bitmap recording which descriptors are in use (fd_map)
+ *
+ * A process starts out with NDFILE descriptors. The value of NDFILE has
+ * been selected based the historical limit of 20 open files, and an
+ * assumption that the majority of processes, especially short-lived
+ * processes like shells, will never need more.
+ *
+ * If this initial allocation is exhausted, a larger descriptor table and
+ * map are allocated dynamically, and the pointers in the process's struct
+ * filedesc are updated to point to those. This is repeated every time
+ * the process runs out of file descriptors (provided it hasn't hit its
+ * resource limit).
+ *
+ * Since threads may hold references to individual descriptor table
+ * entries, the tables are never freed. Instead, they are placed on a
+ * linked list and freed only when the struct filedesc is released.
*/
#define NDFILE 20
#define NDSLOTSIZE sizeof(NDSLOTTYPE)
@@ -148,34 +162,23 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif);
#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
/*
- * Storage required per open file descriptor.
- */
-#define OFILESIZE (sizeof(struct file *) + sizeof(char))
-
-/*
- * Storage to hold unused ofiles that need to be reclaimed.
+ * SLIST entry used to keep track of ofiles which must be reclaimed when
+ * the process exits.
*/
struct freetable {
- struct file **ft_table;
+ struct file **ft_table;
SLIST_ENTRY(freetable) ft_next;
};
/*
- * Basic allocation of descriptors:
- * one of the above, plus arrays for NDFILE descriptors.
+ * Initial allocation: a filedesc structure + the head of SLIST used to
+ * keep track of old ofiles + enough space for NDFILE descriptors.
*/
struct filedesc0 {
- struct filedesc fd_fd;
- /*
- * ofiles which need to be reclaimed on free.
- */
- SLIST_HEAD(,freetable) fd_free;
- /*
- * These arrays are used when the number of open files is
- * <= NDFILE, and are then pointed to by the pointers above.
- */
- struct file *fd_dfiles[NDFILE];
- char fd_dfileflags[NDFILE];
+ struct filedesc fd_fd;
+ SLIST_HEAD(, freetable) fd_free;
+ struct file *fd_dfiles[NDFILE];
+ char fd_dfileflags[NDFILE];
NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
};
@@ -1414,58 +1417,74 @@ static void
fdgrowtable(struct filedesc *fdp, int nfd)
{
struct filedesc0 *fdp0;
- struct freetable *fo;
+ struct freetable *ft;
struct file **ntable;
struct file **otable;
- char *nfileflags;
+ char *nfileflags, *ofileflags;
int nnfiles, onfiles;
- NDSLOTTYPE *nmap;
+ NDSLOTTYPE *nmap, *omap;
FILEDESC_XLOCK_ASSERT(fdp);
KASSERT(fdp->fd_nfiles > 0,
("zero-length file table"));
- /* compute the size of the new table */
+ /* save old values */
onfiles = fdp->fd_nfiles;
+ otable = fdp->fd_ofiles;
+ ofileflags = fdp->fd_ofileflags;
+ omap = fdp->fd_map;
+
+ /* compute the size of the new table */
nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
if (nnfiles <= onfiles)
/* the table is already large enough */
return;
- /* allocate a new table and (if required) new bitmaps */
- ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
+ /*
+ * Allocate a new table and map. We need enough space for a) the
+ * file entries themselves, b) the file flags, and c) the struct
+ * freetable we will use when we decommission the table and place
+ * it on the freelist. We place the struct freetable in the
+ * middle so we don't have to worry about padding.
+ */
+ ntable = malloc(nnfiles * sizeof(*ntable) +
+ sizeof(struct freetable) +
+ nnfiles * sizeof(*nfileflags),
+ M_FILEDESC, M_ZERO | M_WAITOK);
+ nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable);
+ nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
M_FILEDESC, M_ZERO | M_WAITOK);
- nfileflags = (char *)&ntable[nnfiles];
- if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
- nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
- M_FILEDESC, M_ZERO | M_WAITOK);
- else
- nmap = NULL;
- bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
- bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
- otable = fdp->fd_ofiles;
- fdp->fd_ofileflags = nfileflags;
+ /* copy the old data over and point at the new tables */
+ memcpy(ntable, otable, onfiles * sizeof(*otable));
+ memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags));
+ memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
+
+ /* update the pointers and counters */
+ fdp->fd_nfiles = nnfiles;
fdp->fd_ofiles = ntable;
+ fdp->fd_ofileflags = nfileflags;
+ fdp->fd_map = nmap;
+
/*
- * We must preserve ofiles until the process exits because we can't
- * be certain that no threads have references to the old table via
- * _fget().
+ * Do not free the old file table, as some threads may still
+ * reference entries within it. Instead, place it on a freelist
+ * which will be processed when the struct filedesc is released.
+ *
+ * Do, however, free the old map.
+ *
+ * Note that if onfiles == NDFILE, we're dealing with the original
+ * static allocation contained within (struct filedesc0 *)fdp,
+ * which must not be freed.
*/
if (onfiles > NDFILE) {
- fo = (struct freetable *)&otable[onfiles];
+ ft = (struct freetable *)&otable[onfiles];
fdp0 = (struct filedesc0 *)fdp;
- fo->ft_table = otable;
- SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
- }
- if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
- bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
- if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
- free(fdp->fd_map, M_FILEDESC);
- fdp->fd_map = nmap;
+ ft->ft_table = otable;
+ SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next);
+ free(omap, M_FILEDESC);
}
- fdp->fd_nfiles = nnfiles;
}
/*
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 5dc43ca..0d2709f 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -150,11 +150,7 @@ sys_vfork(struct thread *td, struct vfork_args *uap)
int error, flags;
struct proc *p2;
-#ifdef XEN
- flags = RFFDG | RFPROC; /* validate that this is still an issue */
-#else
flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
-#endif
error = fork1(td, flags, 0, &p2, NULL, 0);
if (error == 0) {
td->td_retval[0] = p2->p_pid;
@@ -591,7 +587,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
LIST_INIT(&p2->p_children);
LIST_INIT(&p2->p_orphans);
- callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
+ callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
/*
* If PF_FORK is set, the child process inherits the
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 2ff64d5..c624283 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -4518,6 +4518,8 @@ prison_racct_detach(struct prison *pr)
sx_assert(&allprison_lock, SA_UNLOCKED);
+ if (pr->pr_prison_racct == NULL)
+ return;
prison_racct_free(pr->pr_prison_racct);
pr->pr_prison_racct = NULL;
}
diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c
index 6498498..a83cedf 100644
--- a/sys/kern/kern_ktr.c
+++ b/sys/kern/kern_ktr.c
@@ -112,7 +112,7 @@ static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
&ktr_version, 0, "Version of the KTR interface");
-SYSCTL_INT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
+SYSCTL_UINT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
&ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel");
static void
@@ -190,8 +190,8 @@ sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS)
return (error);
}
-SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_debug_ktr_mask, "I",
+SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_UINT|CTLFLAG_RW, 0, 0,
+ sysctl_debug_ktr_mask, "IU",
"Bitmask of KTR event classes for which logging is enabled");
static int
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index 8b428bd..98f0156 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -35,6 +35,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/lock_profile.h>
@@ -477,7 +478,7 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
__func__, file, line));
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
lk->lock_object.lo_name, file, line));
@@ -934,9 +935,19 @@ __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
}
break;
case LK_DOWNGRADE:
- _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+ _lockmgr_assert(lk, KA_XLOCKED, file, line);
LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
+
+ /*
+ * Panic if the lock is recursed.
+ */
+ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
+ if (flags & LK_INTERLOCK)
+ class->lc_unlock(ilk);
+ panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
+ __func__, iwmesg, file, line);
+ }
TD_SLOCKS_INC(curthread);
/*
@@ -1254,7 +1265,14 @@ _lockmgr_disown(struct lock *lk, const char *file, int line)
return;
tid = (uintptr_t)curthread;
- _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+ _lockmgr_assert(lk, KA_XLOCKED, file, line);
+
+ /*
+ * Panic if the lock is recursed.
+ */
+ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
+ panic("%s: disown a recursed lockmgr @ %s:%d\n",
+ __func__, file, line);
/*
* If the owner is already LK_KERNPROC just skip the whole operation.
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index fe91e3e..3bdfd88 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2004, 2005,
- * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
+ * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -76,7 +76,7 @@ __FBSDID("$FreeBSD$");
* [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
* | \________ |
* [ Cluster Keg ] \ /
- * | [ Mbuf Keg ]
+ * | [ Mbuf Keg ]
* [ Cluster Slabs ] |
* | [ Mbuf Slabs ]
* \____________(VM)_________________/
@@ -137,8 +137,7 @@ tunable_mbinit(void *dummy)
TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
nmbufs = lmax(maxmbufmem / MSIZE / 5,
- nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
-
+ nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
}
SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
@@ -148,7 +147,7 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
int error, newnmbclusters;
newnmbclusters = nmbclusters;
- error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
+ error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
if (error == 0 && req->newptr) {
if (newnmbclusters > nmbclusters &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
@@ -171,7 +170,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
int error, newnmbjumbop;
newnmbjumbop = nmbjumbop;
- error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
+ error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
if (error == 0 && req->newptr) {
if (newnmbjumbop > nmbjumbop &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
@@ -185,8 +184,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
- "Maximum number of mbuf page size jumbo clusters allowed");
-
+ "Maximum number of mbuf page size jumbo clusters allowed");
static int
sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
@@ -194,7 +192,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
int error, newnmbjumbo9;
newnmbjumbo9 = nmbjumbo9;
- error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
+ error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
if (error == 0 && req->newptr) {
if (newnmbjumbo9 > nmbjumbo9&&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
@@ -208,7 +206,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
- "Maximum number of mbuf 9k jumbo clusters allowed");
+ "Maximum number of mbuf 9k jumbo clusters allowed");
static int
sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
@@ -216,7 +214,7 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
int error, newnmbjumbo16;
newnmbjumbo16 = nmbjumbo16;
- error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
+ error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
if (error == 0 && req->newptr) {
if (newnmbjumbo16 > nmbjumbo16 &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
@@ -238,7 +236,7 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
int error, newnmbufs;
newnmbufs = nmbufs;
- error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
+ error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
if (error == 0 && req->newptr) {
if (newnmbufs > nmbufs) {
nmbufs = newnmbufs;
@@ -254,7 +252,6 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
&nmbufs, 0, sysctl_nmbufs, "IU",
"Maximum number of mbufs allowed");
-
SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
"Mbuf general information and statistics");
@@ -307,10 +304,9 @@ mbuf_init(void *dummy)
NULL, NULL,
#endif
MSIZE - 1, UMA_ZONE_MAXBUCKET);
- if (nmbufs > 0) {
- uma_zone_set_max(zone_mbuf, nmbufs);
- nmbufs = uma_zone_get_max(zone_mbuf);
- }
+ if (nmbufs > 0)
+ nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+ uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -320,10 +316,9 @@ mbuf_init(void *dummy)
NULL, NULL,
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
- if (nmbclusters > 0) {
- uma_zone_set_max(zone_clust, nmbclusters);
- nmbclusters = uma_zone_get_max(zone_clust);
- }
+ if (nmbclusters > 0)
+ nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
+ uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -337,10 +332,9 @@ mbuf_init(void *dummy)
NULL, NULL,
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
- if (nmbjumbop > 0) {
- uma_zone_set_max(zone_jumbop, nmbjumbop);
- nmbjumbop = uma_zone_get_max(zone_jumbop);
- }
+ if (nmbjumbop > 0)
+ nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
+ uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -351,10 +345,9 @@ mbuf_init(void *dummy)
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
- if (nmbjumbo9 > 0) {
- uma_zone_set_max(zone_jumbo9, nmbjumbo9);
- nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
- }
+ if (nmbjumbo9 > 0)
+ nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+ uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -365,10 +358,9 @@ mbuf_init(void *dummy)
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
- if (nmbjumbo16 > 0) {
- uma_zone_set_max(zone_jumbo16, nmbjumbo16);
- nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
- }
+ if (nmbjumbo16 > 0)
+ nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+ uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
NULL, NULL,
@@ -488,7 +480,7 @@ static void
mb_dtor_mbuf(void *mem, int size, void *arg)
{
struct mbuf *m;
- unsigned long flags;
+ unsigned long flags;
m = (struct mbuf *)mem;
flags = (unsigned long)arg;
diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c
index b0bc5c8..29864a8 100644
--- a/sys/kern/kern_mib.c
+++ b/sys/kern/kern_mib.c
@@ -377,15 +377,8 @@ SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel,
/* Actual kernel configuration options. */
extern char kernconfstring[];
-static int
-sysctl_kern_config(SYSCTL_HANDLER_ARGS)
-{
- return (sysctl_handle_string(oidp, kernconfstring,
- strlen(kernconfstring), req));
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW,
- 0, 0, sysctl_kern_config, "", "Kernel configuration file");
+SYSCTL_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, kernconfstring, 0,
+ "Kernel configuration file");
#endif
static int
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 2f13863..39f461e 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -210,7 +210,7 @@ __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
m = mtxlock2mtx(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
curthread, m->lock_object.lo_name, file, line));
KASSERT(m->mtx_lock != MTX_DESTROYED,
@@ -326,7 +326,7 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
m = mtxlock2mtx(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
curthread, m->lock_object.lo_name, file, line));
KASSERT(m->mtx_lock != MTX_DESTROYED,
diff --git a/sys/kern/kern_priv.c b/sys/kern/kern_priv.c
index fd3a95c..2f70c2b 100644
--- a/sys/kern/kern_priv.c
+++ b/sys/kern/kern_priv.c
@@ -59,6 +59,11 @@ SYSCTL_INT(_security_bsd, OID_AUTO, suser_enabled, CTLFLAG_RW,
&suser_enabled, 0, "processes with uid 0 have privilege");
TUNABLE_INT("security.bsd.suser_enabled", &suser_enabled);
+static int unprivileged_mlock = 1;
+SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_mlock, CTLFLAG_RW|CTLFLAG_TUN,
+ &unprivileged_mlock, 0, "Allow non-root users to call mlock(2)");
+TUNABLE_INT("security.bsd.unprivileged_mlock", &unprivileged_mlock);
+
SDT_PROVIDER_DEFINE(priv);
SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, priv-ok, "int");
SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, priv-err, "int");
@@ -93,6 +98,19 @@ priv_check_cred(struct ucred *cred, int priv, int flags)
if (error)
goto out;
+ if (unprivileged_mlock) {
+ /*
+ * Allow unprivileged users to call mlock(2)/munlock(2) and
+ * mlockall(2)/munlockall(2).
+ */
+ switch (priv) {
+ case PRIV_VM_MLOCK:
+ case PRIV_VM_MUNLOCK:
+ error = 0;
+ goto out;
+ }
+ }
+
/*
* Having determined if privilege is restricted by various policies,
* now determine if privilege is granted. At this point, any policy
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index 30400b1..ebf0f76 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -494,7 +495,7 @@ void _rm_wlock_debug(struct rmlock *rm, const char *file, int line)
if (SCHEDULER_STOPPED())
return;
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rm_wlock() by idle thread %p on rmlock %s @ %s:%d",
curthread, rm->lock_object.lo_name, file, line));
WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE,
@@ -539,7 +540,7 @@ _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
if (SCHEDULER_STOPPED())
return (1);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rm_rlock() by idle thread %p on rmlock %s @ %s:%d",
curthread, rm->lock_object.lo_name, file, line));
if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE))
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index 60a7faa..38d0654 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include "opt_no_adaptive_rwlocks.h"
#include <sys/param.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@@ -258,7 +259,7 @@ _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
rw = rwlock2rw(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -282,7 +283,7 @@ __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
rw = rwlock2rw(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -364,7 +365,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line)
rw = rwlock2rw(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -558,7 +559,7 @@ __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
rw = rwlock2rw(c);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
curthread, rw->lock_object.lo_name, file, line));
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index b0e4839..fcbae28 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/kthread.h>
+#include <sys/ktr.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/priv.h>
@@ -150,6 +151,7 @@ static void poweroff_wait(void *, int);
static void shutdown_halt(void *junk, int howto);
static void shutdown_panic(void *junk, int howto);
static void shutdown_reset(void *junk, int howto);
+static void vpanic(const char *fmt, va_list ap) __dead2;
/* register various local shutdown events */
static void
@@ -538,6 +540,134 @@ shutdown_reset(void *junk, int howto)
/* NOTREACHED */ /* assuming reset worked */
}
+#if defined(WITNESS) || defined(INVARIANTS)
+static int kassert_warn_only = 0;
+#ifdef KDB
+static int kassert_do_kdb = 0;
+#endif
+#ifdef KTR
+static int kassert_do_ktr = 0;
+#endif
+static int kassert_do_log = 1;
+static int kassert_log_pps_limit = 4;
+static int kassert_log_mute_at = 0;
+static int kassert_log_panic_at = 0;
+static int kassert_warnings = 0;
+
+SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options");
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_warn_only, 0,
+ "KASSERT triggers a panic (1) or just a warning (0)");
+TUNABLE_INT("debug.kassert.warn_only", &kassert_warn_only);
+
+#ifdef KDB
+SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_do_kdb, 0, "KASSERT will enter the debugger");
+TUNABLE_INT("debug.kassert.do_kdb", &kassert_do_kdb);
+#endif
+
+#ifdef KTR
+SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_do_ktr, 0,
+ "KASSERT does a KTR, set this to the KTRMASK you want");
+TUNABLE_INT("debug.kassert.do_ktr", &kassert_do_ktr);
+#endif
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)");
+TUNABLE_INT("debug.kassert.do_log", &kassert_do_log);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_warnings, 0, "number of KASSERTs that have been triggered");
+TUNABLE_INT("debug.kassert.warnings", &kassert_warnings);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
+TUNABLE_INT("debug.kassert.log_panic_at", &kassert_log_panic_at);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_log_pps_limit, 0, "limit number of log messages per second");
+TUNABLE_INT("debug.kassert.log_pps_limit", &kassert_log_pps_limit);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RW | CTLFLAG_TUN,
+ &kassert_log_mute_at, 0, "max number of KASSERTS to log");
+TUNABLE_INT("debug.kassert.log_mute_at", &kassert_log_mute_at);
+
+static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
+ kassert_sysctl_kassert, "I", "set to trigger a test kassert");
+
+static int
+kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
+{
+ int error, i;
+
+ error = sysctl_wire_old_buffer(req, sizeof(int));
+ if (error == 0) {
+ i = 0;
+ error = sysctl_handle_int(oidp, &i, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
+ return (0);
+}
+
+/*
+ * Called by KASSERT, this decides if we will panic
+ * or if we will log via printf and/or ktr.
+ */
+void
+kassert_panic(const char *fmt, ...)
+{
+ static char buf[256];
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ /*
+ * panic if we're not just warning, or if we've exceeded
+ * kassert_log_panic_at warnings.
+ */
+ if (!kassert_warn_only ||
+ (kassert_log_panic_at > 0 &&
+ kassert_warnings >= kassert_log_panic_at)) {
+ va_start(ap, fmt);
+ vpanic(fmt, ap);
+ /* NORETURN */
+ }
+#ifdef KTR
+ if (kassert_do_ktr)
+ CTR0(ktr_mask, buf);
+#endif /* KTR */
+ /*
+ * log if we've not yet met the mute limit.
+ */
+ if (kassert_do_log &&
+ (kassert_log_mute_at == 0 ||
+ kassert_warnings < kassert_log_mute_at)) {
+ static struct timeval lasterr;
+ static int curerr;
+
+ if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
+ printf("KASSERT failed: %s\n", buf);
+ kdb_backtrace();
+ }
+ }
+#ifdef KDB
+ if (kassert_do_kdb) {
+ kdb_enter(KDB_WHY_KASSERT, buf);
+ }
+#endif
+ atomic_add_int(&kassert_warnings, 1);
+}
+#endif
+
/*
* Panic is called on unresolvable fatal errors. It prints "panic: mesg",
* and then reboots. If we are called twice, then we avoid trying to sync
@@ -546,12 +676,20 @@ shutdown_reset(void *junk, int howto)
void
panic(const char *fmt, ...)
{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vpanic(fmt, ap);
+}
+
+static void
+vpanic(const char *fmt, va_list ap)
+{
#ifdef SMP
cpuset_t other_cpus;
#endif
struct thread *td = curthread;
int bootopt, newpanic;
- va_list ap;
static char buf[256];
spinlock_enter();
@@ -587,7 +725,6 @@ panic(const char *fmt, ...)
newpanic = 1;
}
- va_start(ap, fmt);
if (newpanic) {
(void)vsnprintf(buf, sizeof(buf), fmt, ap);
panicstr = buf;
@@ -598,7 +735,6 @@ panic(const char *fmt, ...)
vprintf(fmt, ap);
printf("\n");
}
- va_end(ap);
#ifdef SMP
printf("cpuid = %d\n", PCPU_GET(cpuid));
#endif
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 541ea2b..9c52707 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -106,7 +106,6 @@ SDT_PROBE_ARGTYPE(proc, kernel, , signal_discard, 1, "struct proc *");
SDT_PROBE_ARGTYPE(proc, kernel, , signal_discard, 2, "int");
static int coredump(struct thread *);
-static char *expand_name(const char *, uid_t, pid_t, struct thread *, int);
static int killpg1(struct thread *td, int sig, int pgid, int all,
ksiginfo_t *ksi);
static int issignal(struct thread *td, int stop_allowed);
@@ -202,37 +201,37 @@ SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
#define SA_CANTMASK 0x40 /* non-maskable, catchable */
static int sigproptbl[NSIG] = {
- SA_KILL, /* SIGHUP */
- SA_KILL, /* SIGINT */
- SA_KILL|SA_CORE, /* SIGQUIT */
- SA_KILL|SA_CORE, /* SIGILL */
- SA_KILL|SA_CORE, /* SIGTRAP */
- SA_KILL|SA_CORE, /* SIGABRT */
- SA_KILL|SA_CORE, /* SIGEMT */
- SA_KILL|SA_CORE, /* SIGFPE */
- SA_KILL, /* SIGKILL */
- SA_KILL|SA_CORE, /* SIGBUS */
- SA_KILL|SA_CORE, /* SIGSEGV */
- SA_KILL|SA_CORE, /* SIGSYS */
- SA_KILL, /* SIGPIPE */
- SA_KILL, /* SIGALRM */
- SA_KILL, /* SIGTERM */
- SA_IGNORE, /* SIGURG */
- SA_STOP, /* SIGSTOP */
- SA_STOP|SA_TTYSTOP, /* SIGTSTP */
- SA_IGNORE|SA_CONT, /* SIGCONT */
- SA_IGNORE, /* SIGCHLD */
- SA_STOP|SA_TTYSTOP, /* SIGTTIN */
- SA_STOP|SA_TTYSTOP, /* SIGTTOU */
- SA_IGNORE, /* SIGIO */
- SA_KILL, /* SIGXCPU */
- SA_KILL, /* SIGXFSZ */
- SA_KILL, /* SIGVTALRM */
- SA_KILL, /* SIGPROF */
- SA_IGNORE, /* SIGWINCH */
- SA_IGNORE, /* SIGINFO */
- SA_KILL, /* SIGUSR1 */
- SA_KILL, /* SIGUSR2 */
+ SA_KILL, /* SIGHUP */
+ SA_KILL, /* SIGINT */
+ SA_KILL|SA_CORE, /* SIGQUIT */
+ SA_KILL|SA_CORE, /* SIGILL */
+ SA_KILL|SA_CORE, /* SIGTRAP */
+ SA_KILL|SA_CORE, /* SIGABRT */
+ SA_KILL|SA_CORE, /* SIGEMT */
+ SA_KILL|SA_CORE, /* SIGFPE */
+ SA_KILL, /* SIGKILL */
+ SA_KILL|SA_CORE, /* SIGBUS */
+ SA_KILL|SA_CORE, /* SIGSEGV */
+ SA_KILL|SA_CORE, /* SIGSYS */
+ SA_KILL, /* SIGPIPE */
+ SA_KILL, /* SIGALRM */
+ SA_KILL, /* SIGTERM */
+ SA_IGNORE, /* SIGURG */
+ SA_STOP, /* SIGSTOP */
+ SA_STOP|SA_TTYSTOP, /* SIGTSTP */
+ SA_IGNORE|SA_CONT, /* SIGCONT */
+ SA_IGNORE, /* SIGCHLD */
+ SA_STOP|SA_TTYSTOP, /* SIGTTIN */
+ SA_STOP|SA_TTYSTOP, /* SIGTTOU */
+ SA_IGNORE, /* SIGIO */
+ SA_KILL, /* SIGXCPU */
+ SA_KILL, /* SIGXFSZ */
+ SA_KILL, /* SIGVTALRM */
+ SA_KILL, /* SIGPROF */
+ SA_IGNORE, /* SIGWINCH */
+ SA_IGNORE, /* SIGINFO */
+ SA_KILL, /* SIGUSR1 */
+ SA_KILL, /* SIGUSR2 */
};
static void reschedule_signals(struct proc *p, sigset_t block, int flags);
@@ -3018,11 +3017,11 @@ SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT|CTLFLAG_RW,
#if defined(COMPRESS_USER_CORES)
int compress_user_cores = 1;
SYSCTL_INT(_kern, OID_AUTO, compress_user_cores, CTLFLAG_RW,
- &compress_user_cores, 0, "");
+ &compress_user_cores, 0, "Compression of user corefiles");
int compress_user_cores_gzlevel = -1; /* default level */
SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RW,
- &compress_user_cores_gzlevel, -1, "user core gz compression level");
+ &compress_user_cores_gzlevel, -1, "Corefile gzip compression level");
#define GZ_SUFFIX ".gz"
#define GZ_SUFFIX_LEN 3
@@ -3031,11 +3030,12 @@ SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RW,
static char corefilename[MAXPATHLEN] = {"%N.core"};
TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
SYSCTL_STRING(_kern, OID_AUTO, corefile, CTLFLAG_RW, corefilename,
- sizeof(corefilename), "process corefile name format string");
+ sizeof(corefilename), "Process corefile name format string");
/*
- * expand_name(name, uid, pid, td, compress)
- * Expand the name described in corefilename, using name, uid, and pid.
+ * corefile_open(comm, uid, pid, td, compress, vpp, namep)
+ * Expand the name described in corefilename, using name, uid, and pid
+ * and open/create core file.
* corefilename is a printf-like string, with three format specifiers:
* %N name of process ("name")
* %P process id (pid)
@@ -3044,25 +3044,22 @@ SYSCTL_STRING(_kern, OID_AUTO, corefile, CTLFLAG_RW, corefilename,
* by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
* This is controlled by the sysctl variable kern.corefile (see above).
*/
-static char *
-expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td,
- int compress)
+static int
+corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
+ int compress, struct vnode **vpp, char **namep)
{
+ struct nameidata nd;
struct sbuf sb;
const char *format;
- char *temp;
- size_t i;
- int indexpos;
- char *hostname;
+ char *hostname, *name;
+ int indexpos, i, error, cmode, flags, oflags;
hostname = NULL;
format = corefilename;
- temp = malloc(MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO);
- if (temp == NULL)
- return (NULL);
+ name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
indexpos = -1;
- (void)sbuf_new(&sb, temp, MAXPATHLEN, SBUF_FIXEDLEN);
- for (i = 0; format[i]; i++) {
+ (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
+ for (i = 0; format[i] != '\0'; i++) {
switch (format[i]) {
case '%': /* Format character */
i++;
@@ -3073,27 +3070,18 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td,
case 'H': /* hostname */
if (hostname == NULL) {
hostname = malloc(MAXHOSTNAMELEN,
- M_TEMP, M_NOWAIT);
- if (hostname == NULL) {
- log(LOG_ERR,
- "pid %ld (%s), uid (%lu): "
- "unable to alloc memory "
- "for corefile hostname\n",
- (long)pid, name,
- (u_long)uid);
- goto nomem;
- }
- }
+ M_TEMP, M_WAITOK);
+ }
getcredhostname(td->td_ucred, hostname,
MAXHOSTNAMELEN);
sbuf_printf(&sb, "%s", hostname);
break;
- case 'I': /* autoincrementing index */
+ case 'I': /* autoincrementing index */
sbuf_printf(&sb, "0");
indexpos = sbuf_len(&sb) - 1;
break;
case 'N': /* process name */
- sbuf_printf(&sb, "%s", name);
+ sbuf_printf(&sb, "%s", comm);
break;
case 'P': /* process id */
sbuf_printf(&sb, "%u", pid);
@@ -3105,6 +3093,7 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td,
log(LOG_ERR,
"Unknown format character %c in "
"corename `%s'\n", format[i], format);
+ break;
}
break;
default:
@@ -3113,21 +3102,22 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td,
}
free(hostname, M_TEMP);
#ifdef COMPRESS_USER_CORES
- if (compress) {
+ if (compress)
sbuf_printf(&sb, GZ_SUFFIX);
- }
#endif
if (sbuf_error(&sb) != 0) {
log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
- "long\n", (long)pid, name, (u_long)uid);
-nomem:
+ "long\n", (long)pid, comm, (u_long)uid);
sbuf_delete(&sb);
- free(temp, M_TEMP);
- return (NULL);
+ free(name, M_TEMP);
+ return (ENOMEM);
}
sbuf_finish(&sb);
sbuf_delete(&sb);
+ cmode = S_IRUSR | S_IWUSR;
+ oflags = VN_OPEN_NOAUDIT | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
+
/*
* If the core format has a %I in it, then we need to check
* for existing corefiles before returning a name.
@@ -3135,19 +3125,10 @@ nomem:
* non-existing core file name to use.
*/
if (indexpos != -1) {
- struct nameidata nd;
- int error, n;
- int flags = O_CREAT | O_EXCL | FWRITE | O_NOFOLLOW;
- int cmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
- int oflags = 0;
-
- if (capmode_coredump)
- oflags = VN_OPEN_NOCAPCHECK;
-
- for (n = 0; n < num_cores; n++) {
- temp[indexpos] = '0' + n;
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE,
- temp, td);
+ for (i = 0; i < num_cores; i++) {
+ flags = O_CREAT | O_EXCL | FWRITE | O_NOFOLLOW;
+ name[indexpos] = '0' + i;
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
error = vn_open_cred(&nd, &flags, cmode, oflags,
td->td_ucred, NULL);
if (error) {
@@ -3155,27 +3136,28 @@ nomem:
continue;
log(LOG_ERR,
"pid %d (%s), uid (%u): Path `%s' failed "
- "on initial open test, error = %d\n",
- pid, name, uid, temp, error);
- free(temp, M_TEMP);
- return (NULL);
- }
- NDFREE(&nd, NDF_ONLY_PNBUF);
- VOP_UNLOCK(nd.ni_vp, 0);
- error = vn_close(nd.ni_vp, FWRITE, td->td_ucred, td);
- if (error) {
- log(LOG_ERR,
- "pid %d (%s), uid (%u): Path `%s' failed "
- "on close after initial open test, "
- "error = %d\n",
- pid, name, uid, temp, error);
- free(temp, M_TEMP);
- return (NULL);
+ "on initial open test, error = %d\n",
+ pid, comm, uid, name, error);
}
- break;
+ goto out;
}
}
- return (temp);
+
+ flags = O_CREAT | FWRITE | O_NOFOLLOW;
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
+ error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL);
+out:
+ if (error) {
+#ifdef AUDIT
+ audit_proc_coredump(td, name, error);
+#endif
+ free(name, M_TEMP);
+ return (error);
+ }
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ *vpp = nd.ni_vp;
+ *namep = name;
+ return (0);
}
/*
@@ -3190,12 +3172,11 @@ static int
coredump(struct thread *td)
{
struct proc *p = td->td_proc;
- register struct vnode *vp;
- register struct ucred *cred = td->td_ucred;
+ struct ucred *cred = td->td_ucred;
+ struct vnode *vp;
struct flock lf;
- struct nameidata nd;
struct vattr vattr;
- int error, error1, flags, locked;
+ int error, error1, locked;
struct mount *mp;
char *name; /* name of corefile */
off_t limit;
@@ -3210,22 +3191,8 @@ coredump(struct thread *td)
MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
_STOPEVENT(p, S_CORE, 0);
- name = expand_name(p->p_comm, td->td_ucred->cr_uid, p->p_pid, td,
- compress);
- if (name == NULL) {
- PROC_UNLOCK(p);
-#ifdef AUDIT
- audit_proc_coredump(td, NULL, EINVAL);
-#endif
- return (EINVAL);
- }
- if (((sugid_coredump == 0) && p->p_flag & P_SUGID) ||
- do_coredump == 0) {
+ if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0)) {
PROC_UNLOCK(p);
-#ifdef AUDIT
- audit_proc_coredump(td, name, EFAULT);
-#endif
- free(name, M_TEMP);
return (EFAULT);
}
@@ -3240,33 +3207,19 @@ coredump(struct thread *td)
limit = (off_t)lim_cur(p, RLIMIT_CORE);
if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
PROC_UNLOCK(p);
-#ifdef AUDIT
- audit_proc_coredump(td, name, EFBIG);
-#endif
- free(name, M_TEMP);
return (EFBIG);
}
PROC_UNLOCK(p);
restart:
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
- flags = O_CREAT | FWRITE | O_NOFOLLOW;
- error = vn_open_cred(&nd, &flags, S_IRUSR | S_IWUSR,
- VN_OPEN_NOAUDIT | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0),
- cred, NULL);
- if (error) {
-#ifdef AUDIT
- audit_proc_coredump(td, name, error);
-#endif
- free(name, M_TEMP);
+ error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, compress,
+ &vp, &name);
+ if (error != 0)
return (error);
- }
- NDFREE(&nd, NDF_ONLY_PNBUF);
- vp = nd.ni_vp;
/* Don't dump to non-regular files or files with links. */
- if (vp->v_type != VREG ||
- VOP_GETATTR(vp, &vattr, cred) || vattr.va_nlink != 1) {
+ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
+ vattr.va_nlink != 1) {
VOP_UNLOCK(vp, 0);
error = EFAULT;
goto close;
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 885dc22..d0009b1 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -176,6 +176,12 @@ retry:
/*
* Kernel thread preemption implementation. Critical sections mark
* regions of code in which preemptions are not allowed.
+ *
+ * It might seem a good idea to inline critical_enter() but, in order
+ * to prevent instructions reordering by the compiler, a __compiler_membar()
+ * would have to be used here (the same as sched_pin()). The performance
+ * penalty imposed by the membar could, then, produce slower code than
+ * the function call itself, for most cases.
*/
void
critical_enter(void)
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index af2391f..e469b1a 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -249,7 +250,7 @@ _sx_slock(struct sx *sx, int opts, const char *file, int line)
if (SCHEDULER_STOPPED())
return (0);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("sx_slock() by idle thread %p on sx %s @ %s:%d",
curthread, sx->lock_object.lo_name, file, line));
KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
@@ -273,7 +274,7 @@ sx_try_slock_(struct sx *sx, const char *file, int line)
if (SCHEDULER_STOPPED())
return (1);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
curthread, sx->lock_object.lo_name, file, line));
@@ -302,7 +303,7 @@ _sx_xlock(struct sx *sx, int opts, const char *file, int line)
if (SCHEDULER_STOPPED())
return (0);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("sx_xlock() by idle thread %p on sx %s @ %s:%d",
curthread, sx->lock_object.lo_name, file, line));
KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
@@ -328,7 +329,7 @@ sx_try_xlock_(struct sx *sx, const char *file, int line)
if (SCHEDULER_STOPPED())
return (1);
- KASSERT(!TD_IS_IDLETHREAD(curthread),
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
curthread, sx->lock_object.lo_name, file, line));
KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index c0e7831..97c288d 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -788,13 +788,11 @@ realitexpire(void *arg)
struct timeval ctv, ntv;
p = (struct proc *)arg;
- PROC_LOCK(p);
kern_psignal(p, SIGALRM);
if (!timevalisset(&p->p_realtimer.it_interval)) {
timevalclear(&p->p_realtimer.it_value);
if (p->p_flag & P_WEXIT)
wakeup(&p->p_itcallout);
- PROC_UNLOCK(p);
return;
}
for (;;) {
@@ -806,7 +804,6 @@ realitexpire(void *arg)
timevalsub(&ntv, &ctv);
callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1,
realitexpire, p);
- PROC_UNLOCK(p);
return;
}
}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index e3e1e9d..80933fa 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -84,7 +84,7 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
* TODO:
* allocate more timeout table slots when table overflows.
*/
-int callwheelsize, callwheelbits, callwheelmask;
+int callwheelsize, callwheelmask;
/*
* The callout cpu migration entity represents informations necessary for
@@ -218,12 +218,10 @@ kern_timeout_callwheel_alloc(caddr_t v)
timeout_cpu = PCPU_GET(cpuid);
cc = CC_CPU(timeout_cpu);
/*
- * Calculate callout wheel size
+ * Calculate callout wheel size, should be next power of two higher
+ * than 'ncallout'.
*/
- for (callwheelsize = 1, callwheelbits = 0;
- callwheelsize < ncallout;
- callwheelsize <<= 1, ++callwheelbits)
- ;
+ callwheelsize = 1 << fls(ncallout);
callwheelmask = callwheelsize - 1;
cc->cc_callout = (struct callout *)v;
@@ -441,15 +439,13 @@ static void
callout_cc_del(struct callout *c, struct callout_cpu *cc)
{
- if (cc->cc_next == c)
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
+ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
+ return;
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
-static struct callout *
+static void
softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
int *lockcalls, int *gcalls)
{
@@ -471,7 +467,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
static timeout_t *lastfunc;
#endif
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
+ (CALLOUT_PENDING | CALLOUT_ACTIVE),
+ ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
c_lock = c->c_lock;
@@ -539,20 +537,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
- /*
- * If the current callout is locally allocated (from
- * timeout(9)) then put it on the freelist.
- *
- * Note: we need to check the cached copy of c_flags because
- * if it was not local, then it's not safe to deref the
- * callout pointer.
- */
- if (c_flags & CALLOUT_LOCAL_ALLOC) {
- KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
- ("corrupted callout"));
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
+ KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
cc->cc_curr = NULL;
if (cc->cc_waiting) {
/*
@@ -561,13 +546,22 @@ skip:
* If the callout was scheduled for
* migration just cancel it.
*/
- if (cc_cme_migrating(cc))
+ if (cc_cme_migrating(cc)) {
cc_cme_cleanup(cc);
+
+ /*
+ * It should be assert here that the callout is not
+ * destroyed but that is not easy.
+ */
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ }
cc->cc_waiting = 0;
CC_UNLOCK(cc);
wakeup(&cc->cc_waiting);
CC_LOCK(cc);
} else if (cc_cme_migrating(cc)) {
+ KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
+ ("Migrating legacy callout %p", c));
#ifdef SMP
/*
* If the callout was scheduled for
@@ -580,23 +574,20 @@ skip:
cc_cme_cleanup(cc);
/*
- * Handle deferred callout stops
+ * It should be assert here that the callout is not destroyed
+ * but that is not easy.
+ *
+ * As first thing, handle deferred callout stops.
*/
if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
CTR3(KTR_CALLOUT,
"deferred cancelled %p func %p arg %p",
c, new_func, new_arg);
callout_cc_del(c, cc);
- goto nextc;
+ return;
}
-
c->c_flags &= ~CALLOUT_DFRMIGRATION;
- /*
- * It should be assert here that the
- * callout is not destroyed but that
- * is not easy.
- */
new_cc = callout_cpu_switch(c, cc, new_cpu);
callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
new_cpu);
@@ -606,10 +597,19 @@ skip:
panic("migration should not happen");
#endif
}
-#ifdef SMP
-nextc:
-#endif
- return (cc->cc_next);
+ /*
+ * If the current callout is locally allocated (from
+ * timeout(9)) then put it on the freelist.
+ *
+ * Note: we need to check the cached copy of c_flags because
+ * if it was not local, then it's not safe to deref the
+ * callout pointer.
+ */
+ KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
+ c->c_flags == CALLOUT_LOCAL_ALLOC,
+ ("corrupted callout"));
+ if (c_flags & CALLOUT_LOCAL_ALLOC)
+ callout_cc_del(c, cc);
}
/*
@@ -676,10 +676,12 @@ softclock(void *arg)
steps = 0;
}
} else {
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(bucket, c, c_links.tqe);
- c = softclock_call_cc(c, cc, &mpcalls,
+ softclock_call_cc(c, cc, &mpcalls,
&lockcalls, &gcalls);
steps = 0;
+ c = cc->cc_next;
}
}
}
@@ -1024,6 +1026,8 @@ again:
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
+ if (cc->cc_next == c)
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
c_links.tqe);
callout_cc_del(c, cc);
diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c
new file mode 100644
index 0000000..9406d95
--- /dev/null
+++ b/sys/kern/subr_busdma_bufalloc.c
@@ -0,0 +1,174 @@
+/*-
+ * Copyright (c) 2012 Ian Lepore
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Buffer allocation support routines for bus_dmamem_alloc implementations.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/busdma_bufalloc.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/uma.h>
+
+/*
+ * We manage buffer zones up to a page in size. Buffers larger than a page can
+ * be managed by one of the kernel's page-oriented memory allocation routines as
+ * efficiently as what we can do here. Also, a page is the largest size for
+ * which we can g'tee contiguity when using uma, and contiguity is one of the
+ * requirements we have to fulfill.
+ */
+#define MIN_ZONE_BUFSIZE 32
+#define MAX_ZONE_BUFSIZE PAGE_SIZE
+
+/*
+ * The static array of 12 bufzones is big enough to handle all the zones for the
+ * smallest supported allocation size of 32 through the largest supported page
+ * size of 64K. If you up the biggest page size number, up the array size too.
+ * Basically the size of the array needs to be log2(maxsize)-log2(minsize)+1,
+ * but I don't know of an easy way to express that as a compile-time constant.
+ */
+#if PAGE_SIZE > 65536
+#error Unsupported page size
+#endif
+
+struct busdma_bufalloc {
+ bus_size_t min_size;
+ size_t num_zones;
+ struct busdma_bufzone buf_zones[12];
+};
+
+busdma_bufalloc_t
+busdma_bufalloc_create(const char *name, bus_size_t minimum_alignment,
+ uma_alloc alloc_func, uma_free free_func, u_int32_t zcreate_flags)
+{
+ struct busdma_bufalloc *ba;
+ struct busdma_bufzone *bz;
+ int i;
+ bus_size_t cursize;
+
+ ba = malloc(sizeof(struct busdma_bufalloc), M_DEVBUF,
+ M_ZERO | M_WAITOK);
+
+ ba->min_size = MAX(MIN_ZONE_BUFSIZE, minimum_alignment);
+
+ /*
+ * Each uma zone is created with an alignment of size-1, meaning that
+ * the alignment is equal to the size (I.E., 64 byte buffers are aligned
+ * to 64 byte boundaries, etc). This allows for a fast efficient test
+ * when deciding whether a pool buffer meets the constraints of a given
+ * tag used for allocation: the buffer is usable if tag->alignment <=
+ * bufzone->size.
+ */
+ for (i = 0, bz = ba->buf_zones, cursize = ba->min_size;
+ i < nitems(ba->buf_zones) && cursize <= MAX_ZONE_BUFSIZE;
+ ++i, ++bz, cursize <<= 1) {
+ snprintf(bz->name, sizeof(bz->name), "dma %.10s %lu",
+ name, cursize);
+ bz->size = cursize;
+ bz->umazone = uma_zcreate(bz->name, bz->size,
+ NULL, NULL, NULL, NULL, bz->size - 1, zcreate_flags);
+ if (bz->umazone == NULL) {
+ busdma_bufalloc_destroy(ba);
+ return (NULL);
+ }
+ if (alloc_func != NULL)
+ uma_zone_set_allocf(bz->umazone, alloc_func);
+ if (free_func != NULL)
+ uma_zone_set_freef(bz->umazone, free_func);
+ ++ba->num_zones;
+ }
+
+ return (ba);
+}
+
+void
+busdma_bufalloc_destroy(busdma_bufalloc_t ba)
+{
+ struct busdma_bufzone *bz;
+ int i;
+
+ if (ba == NULL)
+ return;
+
+ for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
+ uma_zdestroy(bz->umazone);
+ }
+
+ free(ba, M_DEVBUF);
+}
+
+struct busdma_bufzone *
+busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_size_t size)
+{
+ struct busdma_bufzone *bz;
+ int i;
+
+ if (size > MAX_ZONE_BUFSIZE)
+ return (NULL);
+
+ for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
+ if (bz->size >= size)
+ return (bz);
+ }
+
+ panic("Didn't find a buffer zone of the right size");
+}
+
+void *
+busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag,
+ int wait)
+{
+#ifdef VM_MEMATTR_UNCACHEABLE
+
+ /* Inform UMA that this allocator uses kernel_map/object. */
+ *pflag = UMA_SLAB_KERNEL;
+
+ return ((void *)kmem_alloc_attr(kernel_map, size, wait, 0,
+ BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE));
+
+#else
+
+ panic("VM_MEMATTR_UNCACHEABLE unavailable");
+
+#endif /* VM_MEMATTR_UNCACHEABLE */
+}
+
+void
+busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag)
+{
+
+ kmem_free(kernel_map, (vm_offset_t)item, size);
+}
+
diff --git a/sys/kern/subr_mchain.c b/sys/kern/subr_mchain.c
index cd2a5f3..e9d7d22 100644
--- a/sys/kern/subr_mchain.c
+++ b/sys/kern/subr_mchain.c
@@ -59,7 +59,7 @@ mb_init(struct mbchain *mbp)
{
struct mbuf *m;
- m = m_gethdr(M_WAIT, MT_DATA);
+ m = m_gethdr(M_WAITOK, MT_DATA);
m->m_len = 0;
mb_initm(mbp, m);
return (0);
@@ -114,7 +114,7 @@ mb_reserve(struct mbchain *mbp, int size)
panic("mb_reserve: size = %d\n", size);
m = mbp->mb_cur;
if (mbp->mb_mleft < size) {
- mn = m_get(M_WAIT, MT_DATA);
+ mn = m_get(M_WAITOK, MT_DATA);
mbp->mb_cur = m->m_next = mn;
m = mn;
m->m_len = 0;
@@ -205,7 +205,7 @@ mb_put_mem(struct mbchain *mbp, c_caddr_t source, int size, int type)
while (size > 0) {
if (mleft == 0) {
if (m->m_next == NULL)
- m = m_getm(m, size, M_WAIT, MT_DATA);
+ m = m_getm(m, size, M_WAITOK, MT_DATA);
else
m = m->m_next;
mleft = M_TRAILINGSPACE(m);
@@ -307,7 +307,7 @@ md_init(struct mdchain *mdp)
{
struct mbuf *m;
- m = m_gethdr(M_WAIT, MT_DATA);
+ m = m_gethdr(M_WAITOK, MT_DATA);
m->m_len = 0;
md_initm(mdp, m);
return (0);
@@ -514,7 +514,7 @@ md_get_mbuf(struct mdchain *mdp, int size, struct mbuf **ret)
{
struct mbuf *m = mdp->md_cur, *rm;
- rm = m_copym(m, mdp->md_pos - mtod(m, u_char*), size, M_WAIT);
+ rm = m_copym(m, mdp->md_pos - mtod(m, u_char*), size, M_WAITOK);
md_get_mem(mdp, NULL, size, MB_MZERO);
*ret = rm;
return (0);
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 2ca0051..510033f 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -333,8 +333,8 @@ init_param2(long physpages)
* available kernel memory (physical or kmem).
* At most it can be 3/4 of available kernel memory.
*/
- realmem = qmin(physpages * PAGE_SIZE,
- VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+ realmem = qmin((quad_t)physpages * PAGE_SIZE,
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
maxmbufmem = realmem / 2;
TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
if (maxmbufmem > (realmem / 4) * 3)
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 3b27dce..3614798 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -766,8 +766,9 @@ quiesce_cpus(cpuset_t map, const char *wmesg, int prio)
thread_unlock(curthread);
while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
error = tsleep(quiesce_cpus, prio, wmesg, 1);
- if (error)
+ if (error != EWOULDBLOCK)
goto out;
+ error = 0;
}
}
out:
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index fe5cd0e..3d6dc5a 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -77,13 +77,12 @@ syscallenter(struct thread *td, struct syscall_args *sa)
if (KTRPOINT(td, KTR_SYSCALL))
ktrsyscall(sa->code, sa->narg, sa->args);
#endif
-
- CTR6(KTR_SYSC,
-"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
- td, td->td_proc->p_pid, syscallname(p, sa->code),
- sa->args[0], sa->args[1], sa->args[2]);
+ KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code),
+ (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0],
+ "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]);
if (error == 0) {
+
STOPEVENT(p, S_SCE, sa->narg);
if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) {
PROC_LOCK(p);
@@ -150,10 +149,12 @@ syscallenter(struct thread *td, struct syscall_args *sa)
sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
#endif
syscall_thread_exit(td, sa->callp);
- CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
- p, error, td->td_retval[0], td->td_retval[1]);
}
retval:
+ KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code),
+ (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "error:%d", error,
+ "retval0:%#lx", td->td_retval[0], "retval1:%#lx",
+ td->td_retval[1]);
if (traced) {
PROC_LOCK(p);
td->td_dbgflags &= ~TDB_SCE;
@@ -176,9 +177,6 @@ syscallret(struct thread *td, int error, struct syscall_args *sa __unused)
*/
userret(td, td->td_frame);
- CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
- syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
-
#ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET)) {
ktrsysret(sa->code, (td->td_pflags & TDP_NERRNO) == 0 ?
diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c
index d518cc3..7b593487 100644
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@@ -389,7 +389,6 @@ again:
case UIO_SYSSPACE:
iov_base = iov->iov_base;
*iov_base = c;
- iov->iov_base = iov_base;
break;
case UIO_NOCOPY:
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index c2aed86..3cbf51a 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -822,16 +822,16 @@ witness_init(struct lock_object *lock, const char *type)
class = LOCK_CLASS(lock);
if ((lock->lo_flags & LO_RECURSABLE) != 0 &&
(class->lc_flags & LC_RECURSABLE) == 0)
- panic("%s: lock (%s) %s can not be recursable", __func__,
- class->lc_name, lock->lo_name);
+ kassert_panic("%s: lock (%s) %s can not be recursable",
+ __func__, class->lc_name, lock->lo_name);
if ((lock->lo_flags & LO_SLEEPABLE) != 0 &&
(class->lc_flags & LC_SLEEPABLE) == 0)
- panic("%s: lock (%s) %s can not be sleepable", __func__,
- class->lc_name, lock->lo_name);
+ kassert_panic("%s: lock (%s) %s can not be sleepable",
+ __func__, class->lc_name, lock->lo_name);
if ((lock->lo_flags & LO_UPGRADABLE) != 0 &&
(class->lc_flags & LC_UPGRADABLE) == 0)
- panic("%s: lock (%s) %s can not be upgradable", __func__,
- class->lc_name, lock->lo_name);
+ kassert_panic("%s: lock (%s) %s can not be upgradable",
+ __func__, class->lc_name, lock->lo_name);
/*
* If we shouldn't watch this lock, then just clear lo_witness.
@@ -847,7 +847,8 @@ witness_init(struct lock_object *lock, const char *type)
pending_locks[pending_cnt].wh_lock = lock;
pending_locks[pending_cnt++].wh_type = type;
if (pending_cnt > WITNESS_PENDLIST)
- panic("%s: pending locks list is too small, bump it\n",
+ panic("%s: pending locks list is too small, "
+ "increase WITNESS_PENDLIST\n",
__func__);
} else
lock->lo_witness = enroll(type, class);
@@ -1073,7 +1074,8 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
* all spin locks.
*/
if (td->td_critnest != 0 && !kdb_active)
- panic("blockable sleep lock (%s) %s @ %s:%d",
+ kassert_panic("acquiring blockable sleep lock with "
+ "spinlock or critical section held (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
@@ -1117,7 +1119,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
fixup_filename(file), line);
printf("while exclusively locked from %s:%d\n",
fixup_filename(lock1->li_file), lock1->li_line);
- panic("share->excl");
+ kassert_panic("share->excl");
}
if ((lock1->li_flags & LI_EXCLUSIVE) == 0 &&
(flags & LOP_EXCLUSIVE) != 0) {
@@ -1126,7 +1128,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
fixup_filename(file), line);
printf("while share locked from %s:%d\n",
fixup_filename(lock1->li_file), lock1->li_line);
- panic("excl->share");
+ kassert_panic("excl->share");
}
return;
}
@@ -1433,26 +1435,32 @@ witness_upgrade(struct lock_object *lock, int flags, const char *file, int line)
class = LOCK_CLASS(lock);
if (witness_watch) {
if ((lock->lo_flags & LO_UPGRADABLE) == 0)
- panic("upgrade of non-upgradable lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "upgrade of non-upgradable lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((class->lc_flags & LC_SLEEPLOCK) == 0)
- panic("upgrade of non-sleep lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "upgrade of non-sleep lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
}
instance = find_instance(curthread->td_sleeplocks, lock);
- if (instance == NULL)
- panic("upgrade of unlocked lock (%s) %s @ %s:%d",
+ if (instance == NULL) {
+ kassert_panic("upgrade of unlocked lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
+ return;
+ }
if (witness_watch) {
if ((instance->li_flags & LI_EXCLUSIVE) != 0)
- panic("upgrade of exclusive lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "upgrade of exclusive lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((instance->li_flags & LI_RECURSEMASK) != 0)
- panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d",
+ kassert_panic(
+ "upgrade of recursed lock (%s) %s r=%d @ %s:%d",
class->lc_name, lock->lo_name,
instance->li_flags & LI_RECURSEMASK,
fixup_filename(file), line);
@@ -1473,26 +1481,32 @@ witness_downgrade(struct lock_object *lock, int flags, const char *file,
class = LOCK_CLASS(lock);
if (witness_watch) {
if ((lock->lo_flags & LO_UPGRADABLE) == 0)
- panic("downgrade of non-upgradable lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "downgrade of non-upgradable lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((class->lc_flags & LC_SLEEPLOCK) == 0)
- panic("downgrade of non-sleep lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "downgrade of non-sleep lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
}
instance = find_instance(curthread->td_sleeplocks, lock);
- if (instance == NULL)
- panic("downgrade of unlocked lock (%s) %s @ %s:%d",
+ if (instance == NULL) {
+ kassert_panic("downgrade of unlocked lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
+ return;
+ }
if (witness_watch) {
if ((instance->li_flags & LI_EXCLUSIVE) == 0)
- panic("downgrade of shared lock (%s) %s @ %s:%d",
+ kassert_panic(
+ "downgrade of shared lock (%s) %s @ %s:%d",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((instance->li_flags & LI_RECURSEMASK) != 0)
- panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d",
+ kassert_panic(
+ "downgrade of recursed lock (%s) %s r=%d @ %s:%d",
class->lc_name, lock->lo_name,
instance->li_flags & LI_RECURSEMASK,
fixup_filename(file), line);
@@ -1534,11 +1548,13 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line)
* We have to make sure we flush these queues, so just search for
* eventual register locks and remove them.
*/
- if (witness_watch > 0)
- panic("lock (%s) %s not locked @ %s:%d", class->lc_name,
+ if (witness_watch > 0) {
+ kassert_panic("lock (%s) %s not locked @ %s:%d", class->lc_name,
lock->lo_name, fixup_filename(file), line);
- else
return;
+ } else {
+ return;
+ }
found:
/* First, check for shared/exclusive mismatches. */
@@ -1548,7 +1564,7 @@ found:
lock->lo_name, fixup_filename(file), line);
printf("while exclusively locked from %s:%d\n",
fixup_filename(instance->li_file), instance->li_line);
- panic("excl->ushare");
+ kassert_panic("excl->ushare");
}
if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 &&
(flags & LOP_EXCLUSIVE) != 0) {
@@ -1557,7 +1573,7 @@ found:
printf("while share locked from %s:%d\n",
fixup_filename(instance->li_file),
instance->li_line);
- panic("share->uexcl");
+ kassert_panic("share->uexcl");
}
/* If we are recursed, unrecurse. */
if ((instance->li_flags & LI_RECURSEMASK) > 0) {
@@ -1571,7 +1587,7 @@ found:
if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) {
printf("forbidden unlock of (%s) %s @ %s:%d\n", class->lc_name,
lock->lo_name, fixup_filename(file), line);
- panic("lock marked norelease");
+ kassert_panic("lock marked norelease");
}
/* Otherwise, remove this item from the list. */
@@ -1626,7 +1642,8 @@ witness_thread_exit(struct thread *td)
witness_list_lock(&lle->ll_children[i], printf);
}
- panic("Thread %p cannot exit while holding sleeplocks\n", td);
+ kassert_panic(
+ "Thread %p cannot exit while holding sleeplocks\n", td);
}
witness_lock_list_free(lle);
}
@@ -1707,7 +1724,7 @@ witness_warn(int flags, struct lock_object *lock, const char *fmt, ...)
} else
sched_unpin();
if (flags & WARN_PANIC && n)
- panic("%s", __func__);
+ kassert_panic("%s", __func__);
else
witness_debugger(n);
return (n);
@@ -1750,11 +1767,13 @@ enroll(const char *description, struct lock_class *lock_class)
return (NULL);
else
typelist = &w_spin;
- } else if ((lock_class->lc_flags & LC_SLEEPLOCK))
+ } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) {
typelist = &w_sleep;
- else
- panic("lock class %s is not sleep or spin",
+ } else {
+ kassert_panic("lock class %s is not sleep or spin",
lock_class->lc_name);
+ return (NULL);
+ }
mtx_lock_spin(&w_mtx);
w = witness_hash_get(description);
@@ -1784,7 +1803,7 @@ found:
w->w_refcount++;
mtx_unlock_spin(&w_mtx);
if (lock_class != w->w_class)
- panic(
+ kassert_panic(
"lock (%s) %s does not match earlier (%s) lock",
description, lock_class->lc_name,
w->w_class->lc_name);
@@ -1910,18 +1929,26 @@ adopt(struct witness *parent, struct witness *child)
static void
itismychild(struct witness *parent, struct witness *child)
{
+ int unlocked;
MPASS(child != NULL && parent != NULL);
if (witness_cold == 0)
mtx_assert(&w_mtx, MA_OWNED);
if (!witness_lock_type_equal(parent, child)) {
- if (witness_cold == 0)
+ if (witness_cold == 0) {
+ unlocked = 1;
mtx_unlock_spin(&w_mtx);
- panic("%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
+ } else {
+ unlocked = 0;
+ }
+ kassert_panic(
+ "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
"the same lock type", __func__, parent->w_name,
parent->w_class->lc_name, child->w_name,
child->w_class->lc_name);
+ if (unlocked)
+ mtx_lock_spin(&w_mtx);
}
adopt(parent, child);
}
@@ -2191,9 +2218,11 @@ witness_save(struct lock_object *lock, const char **filep, int *linep)
lock_list = PCPU_GET(spinlocks);
}
instance = find_instance(lock_list, lock);
- if (instance == NULL)
- panic("%s: lock (%s) %s not locked", __func__,
+ if (instance == NULL) {
+ kassert_panic("%s: lock (%s) %s not locked", __func__,
class->lc_name, lock->lo_name);
+ return;
+ }
*filep = instance->li_file;
*linep = instance->li_line;
}
@@ -2225,10 +2254,12 @@ witness_restore(struct lock_object *lock, const char *file, int line)
}
instance = find_instance(lock_list, lock);
if (instance == NULL)
- panic("%s: lock (%s) %s not locked", __func__,
+ kassert_panic("%s: lock (%s) %s not locked", __func__,
class->lc_name, lock->lo_name);
lock->lo_witness->w_file = file;
lock->lo_witness->w_line = line;
+ if (instance == NULL)
+ return;
instance->li_file = file;
instance->li_line = line;
}
@@ -2249,13 +2280,14 @@ witness_assert(const struct lock_object *lock, int flags, const char *file,
else if ((class->lc_flags & LC_SPINLOCK) != 0)
instance = find_instance(PCPU_GET(spinlocks), lock);
else {
- panic("Lock (%s) %s is not sleep or spin!",
+ kassert_panic("Lock (%s) %s is not sleep or spin!",
class->lc_name, lock->lo_name);
+ return;
}
switch (flags) {
case LA_UNLOCKED:
if (instance != NULL)
- panic("Lock (%s) %s locked @ %s:%d.",
+ kassert_panic("Lock (%s) %s locked @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
break;
@@ -2269,34 +2301,36 @@ witness_assert(const struct lock_object *lock, int flags, const char *file,
case LA_XLOCKED | LA_RECURSED:
case LA_XLOCKED | LA_NOTRECURSED:
if (instance == NULL) {
- panic("Lock (%s) %s not locked @ %s:%d.",
+ kassert_panic("Lock (%s) %s not locked @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
break;
}
if ((flags & LA_XLOCKED) != 0 &&
(instance->li_flags & LI_EXCLUSIVE) == 0)
- panic("Lock (%s) %s not exclusively locked @ %s:%d.",
+ kassert_panic(
+ "Lock (%s) %s not exclusively locked @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((flags & LA_SLOCKED) != 0 &&
(instance->li_flags & LI_EXCLUSIVE) != 0)
- panic("Lock (%s) %s exclusively locked @ %s:%d.",
+ kassert_panic(
+ "Lock (%s) %s exclusively locked @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((flags & LA_RECURSED) != 0 &&
(instance->li_flags & LI_RECURSEMASK) == 0)
- panic("Lock (%s) %s not recursed @ %s:%d.",
+ kassert_panic("Lock (%s) %s not recursed @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
if ((flags & LA_NOTRECURSED) != 0 &&
(instance->li_flags & LI_RECURSEMASK) != 0)
- panic("Lock (%s) %s recursed @ %s:%d.",
+ kassert_panic("Lock (%s) %s recursed @ %s:%d.",
class->lc_name, lock->lo_name,
fixup_filename(file), line);
break;
default:
- panic("Invalid lock assertion at %s:%d.",
+ kassert_panic("Invalid lock assertion at %s:%d.",
fixup_filename(file), line);
}
@@ -2321,9 +2355,11 @@ witness_setflag(struct lock_object *lock, int flag, int set)
lock_list = PCPU_GET(spinlocks);
}
instance = find_instance(lock_list, lock);
- if (instance == NULL)
- panic("%s: lock (%s) %s not locked", __func__,
+ if (instance == NULL) {
+ kassert_panic("%s: lock (%s) %s not locked", __func__,
class->lc_name, lock->lo_name);
+ return;
+ }
if (set)
instance->li_flags |= flag;
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index f47cb03..b97ff7f 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -536,7 +536,8 @@ dofilewrite(td, fd, fp, auio, offset, flags)
ktruio = cloneuio(auio);
#endif
cnt = auio->uio_resid;
- if (fp->f_type == DTYPE_VNODE)
+ if (fp->f_type == DTYPE_VNODE &&
+ (fp->f_vnread_flags & FDEVFS_VNODE) == 0)
bwillwrite();
if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
if (auio->uio_resid != cnt && (error == ERESTART ||
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
index c133fcb..c146232 100644
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -46,8 +46,6 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
-#include <vm/uma.h>
-
/*
* System initialization
*
@@ -270,21 +268,31 @@ domainfinalize(void *dummy)
callout_reset(&pfslow_callout, 1, pfslowtimo, NULL);
}
+struct domain *
+pffinddomain(int family)
+{
+ struct domain *dp;
+
+ for (dp = domains; dp != NULL; dp = dp->dom_next)
+ if (dp->dom_family == family)
+ return (dp);
+ return (NULL);
+}
+
struct protosw *
pffindtype(int family, int type)
{
struct domain *dp;
struct protosw *pr;
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_type && pr->pr_type == type)
return (pr);
- return (0);
+ return (NULL);
}
struct protosw *
@@ -292,21 +300,22 @@ pffindproto(int family, int protocol, int type)
{
struct domain *dp;
struct protosw *pr;
- struct protosw *maybe = 0;
+ struct protosw *maybe;
+ maybe = NULL;
if (family == 0)
- return (0);
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (0);
-found:
+ return (NULL);
+
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (NULL);
+
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
return (pr);
if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
- pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+ pr->pr_protocol == 0 && maybe == NULL)
maybe = pr;
}
return (maybe);
@@ -334,12 +343,10 @@ pf_proto_register(int family, struct protosw *npr)
return (ENXIO);
/* Try to find the specified domain based on the family. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
/* Initialize backpointer to struct domain. */
npr->pr_domain = dp;
fpr = NULL;
@@ -405,12 +412,10 @@ pf_proto_unregister(int family, int protocol, int type)
return (EPROTOTYPE);
/* Try to find the specified domain based on the family type. */
- for (dp = domains; dp; dp = dp->dom_next)
- if (dp->dom_family == family)
- goto found;
- return (EPFNOSUPPORT);
+ dp = pffinddomain(family);
+ if (dp == NULL)
+ return (EPFNOSUPPORT);
-found:
dpr = NULL;
/* Lock out everyone else while we are manipulating the protosw. */
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 4130361..ab6163d 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -520,7 +520,7 @@ m_prepend(struct mbuf *m, int len, int how)
/*
* Make a copy of an mbuf chain starting "off0" bytes from the beginning,
* continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
- * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
* Note that the copy is read-only, because clusters are not copied,
* only their reference counts are incremented.
*/
@@ -1028,7 +1028,7 @@ m_pullup(struct mbuf *n, int len)
} else {
if (len > MHLEN)
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ MGET(m, M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
m->m_len = 0;
@@ -1076,7 +1076,7 @@ m_copyup(struct mbuf *n, int len, int dstoff)
if (len > (MHLEN - dstoff))
goto bad;
- MGET(m, M_DONTWAIT, n->m_type);
+ MGET(m, M_NOWAIT, n->m_type);
if (m == NULL)
goto bad;
m->m_len = 0;
@@ -1195,10 +1195,10 @@ m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
while (totlen > 0) {
if (top == NULL) { /* First one, must be PKTHDR */
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
len = MCLBYTES;
} else {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
len = MHLEN;
/* Place initial small packet/header at end of mbuf */
@@ -1213,10 +1213,10 @@ m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
m->m_pkthdr.len = totlen;
} else {
if (totlen + off >= MINCLSIZE) {
- m = m_getcl(M_DONTWAIT, MT_DATA, 0);
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
len = MCLBYTES;
} else {
- m = m_get(M_DONTWAIT, MT_DATA);
+ m = m_get(M_NOWAIT, MT_DATA);
len = MLEN;
}
if (m == NULL) {
@@ -1260,7 +1260,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
off -= mlen;
totlen += mlen;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
goto out;
bzero(mtod(n, caddr_t), MLEN);
@@ -1284,7 +1284,7 @@ m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
if (len == 0)
break;
if (m->m_next == NULL) {
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, len);
@@ -1328,7 +1328,7 @@ m_append(struct mbuf *m0, int len, c_caddr_t cp)
* Allocate a new mbuf; could check space
* and allocate a cluster instead.
*/
- n = m_get(M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n == NULL)
break;
n->m_len = min(MLEN, remainder);
diff --git a/sys/kern/uipc_mbuf2.c b/sys/kern/uipc_mbuf2.c
index 96be658..e32e2a1 100644
--- a/sys/kern/uipc_mbuf2.c
+++ b/sys/kern/uipc_mbuf2.c
@@ -171,7 +171,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* chop the current mbuf into two pieces, set off to 0.
*/
if (len <= n->m_len - off) {
- o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
+ o = m_dup1(n, off, n->m_len - off, M_NOWAIT);
if (o == NULL) {
m_freem(m);
return NULL; /* ENOBUFS */
@@ -231,9 +231,9 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
* on both end.
*/
if (len > MLEN)
- o = m_getcl(M_DONTWAIT, m->m_type, 0);
+ o = m_getcl(M_NOWAIT, m->m_type, 0);
else
- o = m_get(M_DONTWAIT, m->m_type);
+ o = m_get(M_NOWAIT, m->m_type);
if (!o) {
m_freem(m);
return NULL; /* ENOBUFS */
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 272f939..6325840 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -188,7 +188,7 @@ sowakeup(struct socket *so, struct sockbuf *sb)
}
KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
if (sb->sb_upcall != NULL) {
- ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT);
+ ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
KASSERT(sb == &so->so_rcv,
("SO_SND upcall returned SU_ISCONNECTED"));
@@ -644,7 +644,7 @@ sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
if (asa->sa_len > MLEN)
return (0);
#endif
- MGET(m, M_DONTWAIT, MT_SONAME);
+ MGET(m, M_NOWAIT, MT_SONAME);
if (m == 0)
return (0);
m->m_len = asa->sa_len;
@@ -1002,9 +1002,9 @@ sbcreatecontrol(caddr_t p, int size, int type, int level)
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN)
- m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
+ m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
else
- m = m_get(M_DONTWAIT, MT_CONTROL);
+ m = m_get(M_NOWAIT, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
cp = mtod(m, struct cmsghdr *);
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 4416eca..de8ae5a 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -92,7 +92,7 @@
* from a listen queue to a file descriptor, in order to prevent garbage
* collection of the socket at an untimely moment. For a number of reasons,
* these interfaces are not preferred, and should be avoided.
- *
+ *
* NOTE: With regard to VNETs the general rule is that callers do not set
* curvnet. Exceptions to this rule include soabort(), sodisconnect(),
* sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
@@ -259,25 +259,26 @@ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
* Initialize the socket subsystem and set up the socket
* memory allocator.
*/
-uma_zone_t socket_zone;
+static uma_zone_t socket_zone;
int maxsockets;
static void
socket_zone_change(void *tag)
{
- uma_zone_set_max(socket_zone, maxsockets);
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
}
static void
socket_init(void *tag)
{
- socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(socket_zone, maxsockets);
- EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
- EVENTHANDLER_PRI_FIRST);
+ socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ maxsockets = uma_zone_set_max(socket_zone, maxsockets);
+ uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
+ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
+ EVENTHANDLER_PRI_FIRST);
}
SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
@@ -425,7 +426,16 @@ socreate(int dom, struct socket **aso, int type, int proto,
else
prp = pffindtype(dom, type);
- if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL ||
+ if (prp == NULL) {
+ /* No support for domain. */
+ if (pffinddomain(dom) == NULL)
+ return (EAFNOSUPPORT);
+ /* No support for socket type. */
+ if (proto == 0 && type != 0)
+ return (EPROTOTYPE);
+ return (EPROTONOSUPPORT);
+ }
+ if (prp->pr_usrreqs->pru_attach == NULL ||
prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
return (EPROTONOSUPPORT);
@@ -710,8 +720,10 @@ sofree(struct socket *so)
("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
@@ -777,7 +789,8 @@ soclose(struct socket *so)
goto drop;
while (so->so_state & SS_ISCONNECTED) {
error = tsleep(&so->so_timeo,
- PSOCK | PCATCH, "soclos", so->so_linger * hz);
+ PSOCK | PCATCH, "soclos",
+ so->so_linger * hz);
if (error)
break;
}
@@ -947,7 +960,7 @@ struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
/*
* sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
* sosend_dgram() and sosend_generic() use m_uiotombuf().
- *
+ *
* sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
* all of the data referenced by the uio. If desired, it uses zero-copy.
* *space will be updated to reflect data copied in.
@@ -998,7 +1011,7 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
}
} else {
if (top == NULL) {
- m = m_gethdr(M_WAIT, MT_DATA);
+ m = m_gethdr(M_WAITOK, MT_DATA);
m->m_pkthdr.len = 0;
m->m_pkthdr.rcvif = NULL;
@@ -1010,7 +1023,7 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
if (atomic && m && len < MHLEN)
MH_ALIGN(m, len);
} else {
- m = m_get(M_WAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
len = min(min(MLEN, resid), *space);
}
}
@@ -1433,7 +1446,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
VNET_SO_ASSERT(so);
- m = m_get(M_WAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
if (error)
goto bad;
@@ -1449,8 +1462,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
disposable = 0;
error = uiomoveco(mtod(m, void *),
- min(uio->uio_resid, m->m_len),
- uio, disposable);
+ min(uio->uio_resid, m->m_len), uio, disposable);
} else
#endif /* SOCKET_RECV_PFLIP */
error = uiomove(mtod(m, void *),
@@ -1485,20 +1497,19 @@ sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
else
sb->sb_mb = nextrecord;
- /*
- * Now update any dependent socket buffer fields to reflect the new
- * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
* addition of a second clause that takes care of the case where
* sb_mb has been updated, but remains the last record.
- */
- if (sb->sb_mb == NULL) {
- sb->sb_mbtail = NULL;
- sb->sb_lastrecord = NULL;
- } else if (sb->sb_mb->m_nextpkt == NULL)
- sb->sb_lastrecord = sb->sb_mb;
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
}
-
/*
* Implement receive operations on a socket. We depend on the way that
* records are added to the sockbuf by sbappend. In particular, each record
@@ -1748,7 +1759,7 @@ dontblock:
/*
* If the type of mbuf has changed since the last mbuf
* examined ('type'), end the receive operation.
- */
+ */
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
if (type != m->m_type)
@@ -1787,8 +1798,7 @@ dontblock:
disposable = 0;
error = uiomoveco(mtod(m, char *) + moff,
- (int)len, uio,
- disposable);
+ (int)len, uio, disposable);
} else
#endif /* SOCKET_RECV_PFLIP */
error = uiomove(mtod(m, char *) + moff, (int)len, uio);
@@ -1841,26 +1851,26 @@ dontblock:
int copy_flag;
if (flags & MSG_DONTWAIT)
- copy_flag = M_DONTWAIT;
+ copy_flag = M_NOWAIT;
else
copy_flag = M_WAIT;
- if (copy_flag == M_WAIT)
+ if (copy_flag == M_WAITOK)
SOCKBUF_UNLOCK(&so->so_rcv);
*mp = m_copym(m, 0, len, copy_flag);
- if (copy_flag == M_WAIT)
+ if (copy_flag == M_WAITOK)
SOCKBUF_LOCK(&so->so_rcv);
- if (*mp == NULL) {
- /*
- * m_copym() couldn't
+ if (*mp == NULL) {
+ /*
+ * m_copym() couldn't
* allocate an mbuf. Adjust
* uio_resid back (it was
* adjusted down by len
* bytes, which we didn't end
* up "copying" over).
- */
- uio->uio_resid += len;
- break;
- }
+ */
+ uio->uio_resid += len;
+ break;
+ }
}
m->m_data += len;
m->m_len -= len;
@@ -1893,7 +1903,8 @@ dontblock:
while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
!sosendallatonce(so) && nextrecord == NULL) {
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ if (so->so_error ||
+ so->so_rcv.sb_state & SBS_CANTRCVMORE)
break;
/*
* Notify the protocol that some data has been
@@ -2114,7 +2125,7 @@ deliver:
KASSERT(sb->sb_mb != NULL,
("%s: len > 0 && sb->sb_mb empty", __func__));
- m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
+ m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
if (m == NULL)
len = 0; /* Don't flush data from sockbuf. */
else
@@ -2382,9 +2393,8 @@ soshutdown(struct socket *so, int how)
return (EINVAL);
CURVNET_SET(so->so_vnet);
- if (pr->pr_usrreqs->pru_flush != NULL) {
- (*pr->pr_usrreqs->pru_flush)(so, how);
- }
+ if (pr->pr_usrreqs->pru_flush != NULL)
+ (*pr->pr_usrreqs->pru_flush)(so, how);
if (how != SHUT_WR)
sorflush(so);
if (how != SHUT_RD) {
@@ -2551,7 +2561,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_NO_DDP:
case SO_NO_OFFLOAD:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
SOCK_LOCK(so);
@@ -2564,7 +2574,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2582,7 +2592,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_USER_COOKIE:
error = sooptcopyin(sopt, &val32, sizeof val32,
- sizeof val32);
+ sizeof val32);
if (error)
goto bad;
so->so_user_cookie = val32;
@@ -2593,7 +2603,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SNDLOWAT:
case SO_RCVLOWAT:
error = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ sizeof optval);
if (error)
goto bad;
@@ -2901,11 +2911,11 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
struct mbuf *m, *m_prev;
int sopt_size = sopt->sopt_valsize;
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL)
return ENOBUFS;
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return ENOBUFS;
@@ -2919,14 +2929,14 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
m_prev = m;
while (sopt_size) {
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL) {
m_freem(*mp);
return ENOBUFS;
}
if (sopt_size > MLEN) {
- MCLGET(m, sopt->sopt_td != NULL ? M_WAIT :
- M_DONTWAIT);
+ MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
+ M_NOWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_freem(m);
m_freem(*mp);
@@ -2955,7 +2965,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
int error;
error = copyin(sopt->sopt_val, mtod(m, char *),
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
@@ -2984,17 +2994,17 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
int error;
error = copyout(mtod(m, char *), sopt->sopt_val,
- m->m_len);
+ m->m_len);
if (error != 0) {
m_freem(m0);
return(error);
}
} else
bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
- sopt->sopt_valsize -= m->m_len;
- sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
- valsize += m->m_len;
- m = m->m_next;
+ sopt->sopt_valsize -= m->m_len;
+ sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
+ valsize += m->m_len;
+ m = m->m_next;
}
if (m != NULL) {
/* enough soopt buffer should be given from user-land */
@@ -3322,7 +3332,7 @@ filt_solisten(struct knote *kn, long hint)
struct socket *so = kn->kn_fp->f_data;
kn->kn_data = so->so_qlen;
- return (! TAILQ_EMPTY(&so->so_comp));
+ return (!TAILQ_EMPTY(&so->so_comp));
}
int
@@ -3381,7 +3391,7 @@ soisconnecting(struct socket *so)
void
soisconnected(struct socket *so)
{
- struct socket *head;
+ struct socket *head;
int ret;
restart:
@@ -3409,7 +3419,7 @@ restart:
head->so_accf->so_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_DONTWAIT);
+ head->so_accf->so_accept_filter_arg, M_NOWAIT);
if (ret == SU_ISCONNECTED)
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
@@ -3486,7 +3496,7 @@ soupcall_set(struct socket *so, int which,
int (*func)(struct socket *, void *, int), void *arg)
{
struct sockbuf *sb;
-
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3570,9 +3580,10 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
*/
void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg)
+so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
+ void *arg)
{
-
+
TAILQ_FOREACH(so, &so->so_comp, so_list)
func(so, arg);
}
@@ -3692,11 +3703,13 @@ so_sowwakeup_locked(struct socket *so)
void
so_lock(struct socket *so)
{
+
SOCK_LOCK(so);
}
void
so_unlock(struct socket *so)
{
+
SOCK_UNLOCK(so);
}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 8ecaa02..b29c2c6 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -722,7 +722,7 @@ sendit(td, s, mp, flags)
if (mp->msg_flags == MSG_COMPAT) {
struct cmsghdr *cm;
- M_PREPEND(control, sizeof(*cm), M_WAIT);
+ M_PREPEND(control, sizeof(*cm), M_WAITOK);
cm = mtod(control, struct cmsghdr *);
cm->cmsg_len = control->m_len;
cm->cmsg_level = SOL_SOCKET;
@@ -1661,9 +1661,9 @@ sockargs(mp, buf, buflen, type)
if ((u_int)buflen > MCLBYTES)
return (EINVAL);
}
- m = m_get(M_WAIT, type);
+ m = m_get(M_WAITOK, type);
if ((u_int)buflen > MLEN)
- MCLGET(m, M_WAIT);
+ MCLGET(m, M_WAITOK);
m->m_len = buflen;
error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
if (error)
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index c60b177..c732c70 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1778,6 +1778,7 @@ unp_init(void)
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
+ uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
LIST_INIT(&unp_dhead);
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 0480bd4..96c8442 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -2107,15 +2107,16 @@ restart:
if (maxsize != bp->b_kvasize) {
vm_offset_t addr = 0;
+ int rv;
bfreekva(bp);
vm_map_lock(buffer_map);
if (vm_map_findspace(buffer_map,
- vm_map_min(buffer_map), maxsize, &addr)) {
+ vm_map_min(buffer_map), maxsize, &addr)) {
/*
- * Uh oh. Buffer map is to fragmented. We
- * must defragment the map.
+ * Buffer map is too fragmented.
+ * We must defragment the map.
*/
atomic_add_int(&bufdefragcnt, 1);
vm_map_unlock(buffer_map);
@@ -2124,22 +2125,21 @@ restart:
brelse(bp);
goto restart;
}
- if (addr) {
- vm_map_insert(buffer_map, NULL, 0,
- addr, addr + maxsize,
- VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
-
- bp->b_kvabase = (caddr_t) addr;
- bp->b_kvasize = maxsize;
- atomic_add_long(&bufspace, bp->b_kvasize);
- atomic_add_int(&bufreusecnt, 1);
- }
+ rv = vm_map_insert(buffer_map, NULL, 0, addr,
+ addr + maxsize, VM_PROT_ALL, VM_PROT_ALL,
+ MAP_NOFAULT);
+ KASSERT(rv == KERN_SUCCESS,
+ ("vm_map_insert(buffer_map) rv %d", rv));
vm_map_unlock(buffer_map);
+ bp->b_kvabase = (caddr_t)addr;
+ bp->b_kvasize = maxsize;
+ atomic_add_long(&bufspace, bp->b_kvasize);
+ atomic_add_int(&bufreusecnt, 1);
}
bp->b_saveaddr = bp->b_kvabase;
bp->b_data = bp->b_saveaddr;
}
- return(bp);
+ return (bp);
}
/*
@@ -2209,7 +2209,7 @@ buf_daemon()
while (numdirtybuffers > lodirtybuffers) {
if (buf_do_flush(NULL) == 0)
break;
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
}
lodirtybuffers = lodirtysave;
@@ -2605,8 +2605,6 @@ loop:
* If this check ever becomes a bottleneck it may be better to
* move it into the else, when gbincore() fails. At the moment
* it isn't a problem.
- *
- * XXX remove if 0 sections (clean this up after its proven)
*/
if (numfreebuffers == 0) {
if (TD_IS_IDLETHREAD(curthread))
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 4185211..114c23e 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -208,7 +208,7 @@ vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
np->netc_anon = crget();
np->netc_anon->cr_uid = argp->ex_anon.cr_uid;
crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups,
- np->netc_anon->cr_groups);
+ argp->ex_anon.cr_groups);
np->netc_anon->cr_prison = &prison0;
prison_hold(np->netc_anon->cr_prison);
np->netc_numsecflavors = argp->ex_numsecflavors;
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 25b79ae..5926e15 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -559,7 +559,7 @@ vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
if (error || fstype[fstypelen - 1] != '\0') {
error = EINVAL;
if (errmsg != NULL)
- strncpy(errmsg, "Invalid fstype", errmsg_len);
+ strlcpy(errmsg, "Invalid fstype", errmsg_len);
goto bail;
}
fspathlen = 0;
@@ -567,7 +567,7 @@ vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
if (error || fspath[fspathlen - 1] != '\0') {
error = EINVAL;
if (errmsg != NULL)
- strncpy(errmsg, "Invalid fspath", errmsg_len);
+ strlcpy(errmsg, "Invalid fspath", errmsg_len);
goto bail;
}
@@ -711,7 +711,7 @@ sys_mount(td, uap)
int error;
/*
- * Mount flags are now 64-bits. On 32-bit archtectures only
+ * Mount flags are now 64-bits. On 32-bit architectures only
* 32-bits are passed in, but from here on everything handles
* 64-bit flags correctly.
*/
@@ -1447,7 +1447,7 @@ vfs_filteropt(struct vfsoptlist *opts, const char **legal)
if (ret != 0) {
TAILQ_FOREACH(opt, opts, link) {
if (strcmp(opt->name, "errmsg") == 0) {
- strncpy((char *)opt->value, errmsg, opt->len);
+ strlcpy((char *)opt->value, errmsg, opt->len);
break;
}
}
@@ -1724,7 +1724,7 @@ __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
if (should_yield()) {
MNT_IUNLOCK(mp);
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
MNT_ILOCK(mp);
}
vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c
index 83948f2..147926e 100644
--- a/sys/kern/vfs_mountroot.c
+++ b/sys/kern/vfs_mountroot.c
@@ -672,10 +672,11 @@ parse_mount_dev_present(const char *dev)
return (error != 0) ? 0 : 1;
}
+#define ERRMSGL 255
static int
parse_mount(char **conf)
{
- char errmsg[255];
+ char *errmsg;
struct mntarg *ma;
char *dev, *fs, *opts, *tok;
int delay, error, timeout;
@@ -707,7 +708,7 @@ parse_mount(char **conf)
printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
(opts != NULL) ? opts : "");
- bzero(errmsg, sizeof(errmsg));
+ errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
if (vfs_byname(fs) == NULL) {
strlcpy(errmsg, "unknown file system", sizeof(errmsg));
@@ -734,7 +735,7 @@ parse_mount(char **conf)
ma = mount_arg(ma, "fstype", fs, -1);
ma = mount_arg(ma, "fspath", "/", -1);
ma = mount_arg(ma, "from", dev, -1);
- ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
+ ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
ma = mount_arg(ma, "ro", NULL, 0);
ma = parse_mountroot_options(ma, opts);
error = kernel_mount(ma, MNT_ROOTFS);
@@ -748,11 +749,13 @@ parse_mount(char **conf)
printf(".\n");
}
free(fs, M_TEMP);
+ free(errmsg, M_TEMP);
if (opts != NULL)
free(opts, M_TEMP);
/* kernel_mount can return -1 on error. */
return ((error < 0) ? EDOOFUS : error);
}
+#undef ERRMSGL
static int
vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2c470df..7c243b6 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <sys/reboot.h>
#include <sys/sched.h>
#include <sys/sleepqueue.h>
+#include <sys/smp.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -740,7 +741,7 @@ next_iter:
continue;
MNT_IUNLOCK(mp);
yield:
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
relock_mnt:
MNT_ILOCK(mp);
}
@@ -852,7 +853,7 @@ vnlru_proc(void)
vnlru_nowhere++;
tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
} else
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
}
}
@@ -4634,7 +4635,7 @@ __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp)
struct vnode *vp;
if (should_yield())
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
MNT_ILOCK(mp);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
@@ -4710,30 +4711,48 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
* These are helper functions for filesystems to traverse their
* active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
*/
-struct vnode *
-__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+static void
+mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
{
- struct vnode *vp, *nvp;
- if (should_yield())
- kern_yield(PRI_UNCHANGED);
+ KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+
MNT_ILOCK(mp);
-restart:
- mtx_lock(&vnode_free_list_mtx);
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ free(*mvp, M_VNODE_MARKER);
+ *mvp = NULL;
+}
+
+static struct vnode *
+mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+ struct vnode *vp, *nvp;
+
+ mtx_assert(&vnode_free_list_mtx, MA_OWNED);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+restart:
vp = TAILQ_NEXT(*mvp, v_actfreelist);
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
while (vp != NULL) {
if (vp->v_type == VMARKER) {
vp = TAILQ_NEXT(vp, v_actfreelist);
continue;
}
if (!VI_TRYLOCK(vp)) {
- mtx_unlock(&vnode_free_list_mtx);
- kern_yield(PRI_UNCHANGED);
- goto restart;
+ if (mp_ncpus == 1 || should_yield()) {
+ TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
+ mtx_unlock(&vnode_free_list_mtx);
+ kern_yield(PRI_USER);
+ mtx_lock(&vnode_free_list_mtx);
+ goto restart;
+ }
+ continue;
}
- if (vp->v_mount == mp && vp->v_type != VMARKER &&
- (vp->v_iflag & VI_DOOMED) == 0)
+ KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
+ KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
+ ("alien vnode on the active list %p %p", vp, mp));
+ if (vp->v_mount == mp && (vp->v_iflag & VI_DOOMED) == 0)
break;
nvp = TAILQ_NEXT(vp, v_actfreelist);
VI_UNLOCK(vp);
@@ -4743,86 +4762,58 @@ restart:
/* Check if we are done */
if (vp == NULL) {
mtx_unlock(&vnode_free_list_mtx);
- __mnt_vnode_markerfree_active(mvp, mp);
- /* MNT_IUNLOCK(mp); -- done in above function */
- mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+ mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
- TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
- MNT_IUNLOCK(mp);
ASSERT_VI_LOCKED(vp, "active iter");
KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
return (vp);
}
struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+
+ if (should_yield())
+ kern_yield(PRI_USER);
+ mtx_lock(&vnode_free_list_mtx);
+ return (mnt_vnode_next_active(mvp, mp));
+}
+
+struct vnode *
__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
{
- struct vnode *vp, *nvp;
+ struct vnode *vp;
*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
MNT_ILOCK(mp);
MNT_REF(mp);
+ MNT_IUNLOCK(mp);
(*mvp)->v_type = VMARKER;
+ (*mvp)->v_mount = mp;
-restart:
mtx_lock(&vnode_free_list_mtx);
vp = TAILQ_FIRST(&mp->mnt_activevnodelist);
- while (vp != NULL) {
- if (vp->v_type == VMARKER) {
- vp = TAILQ_NEXT(vp, v_actfreelist);
- continue;
- }
- if (!VI_TRYLOCK(vp)) {
- mtx_unlock(&vnode_free_list_mtx);
- kern_yield(PRI_UNCHANGED);
- goto restart;
- }
- if (vp->v_mount == mp && vp->v_type != VMARKER &&
- (vp->v_iflag & VI_DOOMED) == 0)
- break;
- nvp = TAILQ_NEXT(vp, v_actfreelist);
- VI_UNLOCK(vp);
- vp = nvp;
- }
-
- /* Check if we are done */
if (vp == NULL) {
mtx_unlock(&vnode_free_list_mtx);
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- free(*mvp, M_VNODE_MARKER);
- *mvp = NULL;
+ mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
- (*mvp)->v_mount = mp;
- TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
- mtx_unlock(&vnode_free_list_mtx);
- MNT_IUNLOCK(mp);
- ASSERT_VI_LOCKED(vp, "active iter first");
- KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
- return (vp);
+ TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
+ return (mnt_vnode_next_active(mvp, mp));
}
void
__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
{
- if (*mvp == NULL) {
- MNT_IUNLOCK(mp);
+ if (*mvp == NULL)
return;
- }
- mtx_assert(MNT_MTX(mp), MA_OWNED);
-
- KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- free(*mvp, M_VNODE_MARKER);
- *mvp = NULL;
+ mnt_vnode_markerfree_active(mvp, mp);
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 3f65b05..bbe837a 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1434,6 +1434,40 @@ vn_closefile(fp, td)
* proceed. If a suspend request is in progress, we wait until the
* suspension is over, and then proceed.
*/
+static int
+vn_start_write_locked(struct mount *mp, int flags)
+{
+ int error;
+
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+ error = 0;
+
+ /*
+ * Check on status of suspension.
+ */
+ if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
+ mp->mnt_susp_owner != curthread) {
+ while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+ if (flags & V_NOWAIT) {
+ error = EWOULDBLOCK;
+ goto unlock;
+ }
+ error = msleep(&mp->mnt_flag, MNT_MTX(mp),
+ (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
+ if (error)
+ goto unlock;
+ }
+ }
+ if (flags & V_XSLEEP)
+ goto unlock;
+ mp->mnt_writeopcount++;
+unlock:
+ if (error != 0 || (flags & V_XSLEEP) != 0)
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ return (error);
+}
+
int
vn_start_write(vp, mpp, flags)
struct vnode *vp;
@@ -1470,30 +1504,7 @@ vn_start_write(vp, mpp, flags)
if (vp == NULL)
MNT_REF(mp);
- /*
- * Check on status of suspension.
- */
- if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
- mp->mnt_susp_owner != curthread) {
- while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
- if (flags & V_NOWAIT) {
- error = EWOULDBLOCK;
- goto unlock;
- }
- error = msleep(&mp->mnt_flag, MNT_MTX(mp),
- (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
- if (error)
- goto unlock;
- }
- }
- if (flags & V_XSLEEP)
- goto unlock;
- mp->mnt_writeopcount++;
-unlock:
- if (error != 0 || (flags & V_XSLEEP) != 0)
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- return (error);
+ return (vn_start_write_locked(mp, flags));
}
/*
@@ -1639,8 +1650,7 @@ vfs_write_suspend(mp)
* Request a filesystem to resume write operations.
*/
void
-vfs_write_resume(mp)
- struct mount *mp;
+vfs_write_resume_flags(struct mount *mp, int flags)
{
MNT_ILOCK(mp);
@@ -1652,10 +1662,26 @@ vfs_write_resume(mp)
wakeup(&mp->mnt_writeopcount);
wakeup(&mp->mnt_flag);
curthread->td_pflags &= ~TDP_IGNSUSP;
+ if ((flags & VR_START_WRITE) != 0) {
+ MNT_REF(mp);
+ mp->mnt_writeopcount++;
+ }
MNT_IUNLOCK(mp);
- VFS_SUSP_CLEAN(mp);
- } else
+ if ((flags & VR_NO_SUSPCLR) == 0)
+ VFS_SUSP_CLEAN(mp);
+ } else if ((flags & VR_START_WRITE) != 0) {
+ MNT_REF(mp);
+ vn_start_write_locked(mp, 0);
+ } else {
MNT_IUNLOCK(mp);
+ }
+}
+
+void
+vfs_write_resume(struct mount *mp)
+{
+
+ vfs_write_resume_flags(mp, 0);
}
/*
OpenPOWER on IntegriCloud