summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_mbuf.c51
-rw-r--r--sys/kern/kern_priv.c10
-rw-r--r--sys/kern/kern_rmlock.c4
-rw-r--r--sys/kern/kern_synch.c2
-rw-r--r--sys/kern/kern_time.c9
-rw-r--r--sys/kern/subr_bus.c8
-rw-r--r--sys/kern/subr_witness.c8
-rw-r--r--sys/kern/uipc_mbuf.c6
-rw-r--r--sys/kern/uipc_syscalls.c26
-rw-r--r--sys/kern/vfs_bio.c17
-rw-r--r--sys/kern/vfs_cache.c4
-rw-r--r--sys/kern/vfs_mount.c4
-rw-r--r--sys/kern/vfs_subr.c2
-rw-r--r--sys/kern/vfs_vnops.c18
14 files changed, 101 insertions, 68 deletions
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index 1762b72..9e85806 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -102,7 +102,11 @@ int nmbclusters; /* limits number of mbuf clusters */
int nmbjumbop; /* limits number of page size jumbo clusters */
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
-struct mbstat mbstat;
+
+static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
+
+SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
+ "Maximum real memory allocateable to various mbuf types");
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
@@ -110,7 +114,7 @@ struct mbstat mbstat;
static void
tunable_mbinit(void *dummy)
{
- quad_t realmem, maxmbufmem;
+ quad_t realmem;
/*
* The default limit for all mbuf related memory is 1/2 of all
@@ -120,7 +124,7 @@ tunable_mbinit(void *dummy)
realmem = qmin((quad_t)physmem * PAGE_SIZE,
vm_map_max(kmem_map) - vm_map_min(kmem_map));
maxmbufmem = realmem / 2;
- TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
+ TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
if (maxmbufmem > realmem / 4 * 3)
maxmbufmem = realmem / 4 * 3;
@@ -162,8 +166,7 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
if (newnmbclusters > nmbclusters &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbclusters = newnmbclusters;
- uma_zone_set_max(zone_clust, nmbclusters);
- nmbclusters = uma_zone_get_max(zone_clust);
+ nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
EVENTHANDLER_INVOKE(nmbclusters_change);
} else
error = EINVAL;
@@ -185,8 +188,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
if (newnmbjumbop > nmbjumbop &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbop = newnmbjumbop;
- uma_zone_set_max(zone_jumbop, nmbjumbop);
- nmbjumbop = uma_zone_get_max(zone_jumbop);
+ nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
} else
error = EINVAL;
}
@@ -204,11 +206,10 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
newnmbjumbo9 = nmbjumbo9;
error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
if (error == 0 && req->newptr) {
- if (newnmbjumbo9 > nmbjumbo9&&
+ if (newnmbjumbo9 > nmbjumbo9 &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbo9 = newnmbjumbo9;
- uma_zone_set_max(zone_jumbo9, nmbjumbo9);
- nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
+ nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
} else
error = EINVAL;
}
@@ -229,8 +230,7 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
if (newnmbjumbo16 > nmbjumbo16 &&
nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbo16 = newnmbjumbo16;
- uma_zone_set_max(zone_jumbo16, nmbjumbo16);
- nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
+ nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
} else
error = EINVAL;
}
@@ -250,21 +250,17 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
if (error == 0 && req->newptr) {
if (newnmbufs > nmbufs) {
nmbufs = newnmbufs;
- uma_zone_set_max(zone_mbuf, nmbufs);
- nmbufs = uma_zone_get_max(zone_mbuf);
+ nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
EVENTHANDLER_INVOKE(nmbufs_change);
} else
error = EINVAL;
}
return (error);
}
-SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
&nmbufs, 0, sysctl_nmbufs, "IU",
"Maximum number of mbufs allowed");
-SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
- "Mbuf general information and statistics");
-
/*
* Zones from which we allocate.
*/
@@ -384,25 +380,6 @@ mbuf_init(void *dummy)
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
-
- /*
- * [Re]set counters and local statistics knobs.
- * XXX Some of these should go and be replaced, but UMA stat
- * gathering needs to be revised.
- */
- mbstat.m_mbufs = 0;
- mbstat.m_mclusts = 0;
- mbstat.m_drain = 0;
- mbstat.m_msize = MSIZE;
- mbstat.m_mclbytes = MCLBYTES;
- mbstat.m_minclsize = MINCLSIZE;
- mbstat.m_mlen = MLEN;
- mbstat.m_mhlen = MHLEN;
- mbstat.m_numtypes = MT_NTYPES;
-
- mbstat.m_mcfail = mbstat.m_mpfail = 0;
- mbstat.sf_iocnt = 0;
- mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
diff --git a/sys/kern/kern_priv.c b/sys/kern/kern_priv.c
index fcd5993..4d266ab 100644
--- a/sys/kern/kern_priv.c
+++ b/sys/kern/kern_priv.c
@@ -142,6 +142,16 @@ priv_check_cred(struct ucred *cred, int priv, int flags)
}
/*
+ * Writes to kernel/physical memory are a typical root-only operation,
+ * but non-root users are expected to be able to read it (provided they
+ * have permission to access /dev/[k]mem).
+ */
+ if (priv == PRIV_KMEM_READ) {
+ error = 0;
+ goto out;
+ }
+
+ /*
* Now check with MAC, if enabled, to see if a policy module grants
* privilege.
*/
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index fa22272..ff397eb 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -704,6 +704,10 @@ _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker,
#endif
#ifdef INVARIANT_SUPPORT
+#ifndef INVARIANTS
+#undef _rm_assert
+#endif
+
/*
* Note that this does not need to use witness_assert() for read lock
* assertions since an exact count of read locks held by this thread
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 84b1c61..fb9c9bf 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -581,7 +581,7 @@ int
should_yield(void)
{
- return (ticks - curthread->td_swvoltick >= hogticks);
+ return ((unsigned int)(ticks - curthread->td_swvoltick) >= hogticks);
}
void
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index b68c949..9e0cc06 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -297,14 +297,9 @@ get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats)
PROC_UNLOCK(td2->td_proc);
} else {
pid = clock_id & CPUCLOCK_ID_MASK;
- p2 = pfind(pid);
- if (p2 == NULL)
- return (EINVAL);
- error = p_cansee(td, p2);
- if (error) {
- PROC_UNLOCK(p2);
+ error = pget(pid, PGET_CANSEE, &p2);
+ if (error != 0)
return (EINVAL);
- }
get_process_cputime(p2, ats);
PROC_UNLOCK(p2);
}
diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index f196c8b..717ded4 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <net/vnet.h>
+
#include <machine/stdarg.h>
#include <vm/uma.h>
@@ -2735,7 +2737,11 @@ device_probe_and_attach(device_t dev)
return (0);
else if (error != 0)
return (error);
- return (device_attach(dev));
+
+ CURVNET_SET_QUIET(vnet0);
+ error = device_attach(dev);
+ CURVNET_RESTORE();
+ return error;
}
/**
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index bf28a88..3b4d7a2 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -1138,12 +1138,16 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
iclass = LOCK_CLASS(interlock);
lock1 = find_instance(lock_list, interlock);
if (lock1 == NULL)
- kassert_panic("interlock (%s) %s not locked @ %s:%d",
+ kassert_panic(
+ "interlock (%s) %s not locked while locking"
+ " %s @ %s:%d",
iclass->lc_name, interlock->lo_name,
flags & LOP_EXCLUSIVE ? "exclusive" : "shared",
fixup_filename(file), line);
else if ((lock1->li_flags & LI_RECURSEMASK) != 0)
- kassert_panic("interlock (%s) %s recursed @ %s:%d",
+ kassert_panic(
+ "interlock (%s) %s recursed while locking %s"
+ " @ %s:%d",
iclass->lc_name, interlock->lo_name,
flags & LOP_EXCLUSIVE ? "exclusive" : "shared",
fixup_filename(file), line);
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index c369797..f555adf 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -649,13 +649,10 @@ m_copym(struct mbuf *m, int off0, int len, int wait)
m = m->m_next;
np = &n->m_next;
}
- if (top == NULL)
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (top);
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -860,7 +857,6 @@ m_copypacket(struct mbuf *m, int how)
return top;
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -964,7 +960,6 @@ m_dup(struct mbuf *m, int how)
nospace:
m_freem(top);
- mbstat.m_mcfail++; /* XXX: No consistency. */
return (NULL);
}
@@ -1124,7 +1119,6 @@ m_pullup(struct mbuf *n, int len)
return (m);
bad:
m_freem(n);
- mbstat.m_mpfail++; /* XXX: No consistency. */
return (NULL);
}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index a477820..46ceef2 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -115,6 +115,7 @@ static int getsockname1(struct thread *td, struct getsockname_args *uap,
static int getpeername1(struct thread *td, struct getpeername_args *uap,
int compat);
+counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
/*
* NSFBUFS-related variables and associated sysctls
*/
@@ -129,6 +130,27 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
+static void
+sfstat_init(const void *unused)
+{
+
+ COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
+ M_WAITOK);
+}
+SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
+
+static int
+sfstat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct sfstat s;
+
+ COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
+ if (req->newptr)
+ COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
+ return (SYSCTL_OUT(req, &s, sizeof(s)));
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
+ NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
/*
* Convert a user file descriptor to a kernel file entry and check if required
* capability rights are present.
@@ -2241,7 +2263,7 @@ retry_space:
vm_page_io_finish(pg);
if (!error)
VM_OBJECT_WUNLOCK(obj);
- mbstat.sf_iocnt++;
+ SFSTAT_INC(sf_iocnt);
}
if (error) {
vm_page_lock(pg);
@@ -2273,7 +2295,7 @@ retry_space:
sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
SFB_CATCH);
if (sf == NULL) {
- mbstat.sf_allocfail++;
+ SFSTAT_INC(sf_allocfail);
vm_page_lock(pg);
vm_page_unwire(pg, 0);
KASSERT(pg->object != NULL,
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index f6d5a85..205e9b3 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -474,10 +474,12 @@ runningbufwakeup(struct buf *bp)
{
long space, bspace;
- if (bp->b_runningbufspace == 0)
- return;
- space = atomic_fetchadd_long(&runningbufspace, -bp->b_runningbufspace);
bspace = bp->b_runningbufspace;
+ if (bspace == 0)
+ return;
+ space = atomic_fetchadd_long(&runningbufspace, -bspace);
+ KASSERT(space >= bspace, ("runningbufspace underflow %ld %ld",
+ space, bspace));
bp->b_runningbufspace = 0;
/*
* Only acquire the lock and wakeup on the transition from exceeding
@@ -561,7 +563,7 @@ waitrunningbufspace(void)
mtx_lock(&rbreqlock);
while (runningbufspace > hirunningspace) {
- ++runningbufreq;
+ runningbufreq = 1;
msleep(&runningbufreq, &rbreqlock, PVM, "wdrain", 0);
}
mtx_unlock(&rbreqlock);
@@ -1692,7 +1694,8 @@ brelse(struct buf *bp)
KASSERT(presid >= 0, ("brelse: extra page"));
VM_OBJECT_WLOCK(obj);
- vm_page_set_invalid(m, poffset, presid);
+ if (pmap_page_wired_mappings(m) == 0)
+ vm_page_set_invalid(m, poffset, presid);
VM_OBJECT_WUNLOCK(obj);
if (had_bogus)
printf("avoided corruption bug in bogus_page/brelse code\n");
@@ -4485,8 +4488,8 @@ bdata2bio(struct buf *bp, struct bio *bip)
bip->bio_flags |= BIO_UNMAPPED;
KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) /
PAGE_SIZE == bp->b_npages,
- ("Buffer %p too short: %d %d %d", bp, bip->bio_ma_offset,
- bip->bio_length, bip->bio_ma_n));
+ ("Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset,
+ (long long)bip->bio_length, bip->bio_ma_n));
} else {
bip->bio_data = bp->b_data;
bip->bio_ma = NULL;
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 9eecc09..31ed545 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -68,10 +68,10 @@ SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *",
"char *");
SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *");
SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *",
- "struct char *", "struct vnode *");
+ "char *", "struct vnode *");
SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *");
SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int",
- "struct vnode *", "struct char *");
+ "struct vnode *", "char *");
SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *",
"struct vnode *");
SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative,
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 448fdbb..493bb98 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -861,8 +861,9 @@ vfs_domount_first(
vfs_event_signal(NULL, VQ_MOUNT, 0);
if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp))
panic("mount: lost mount");
- VOP_UNLOCK(newdp, 0);
VOP_UNLOCK(vp, 0);
+ EVENTHANDLER_INVOKE(vfs_mounted, mp, newdp, td);
+ VOP_UNLOCK(newdp, 0);
mountcheckdirs(vp, newdp);
vrele(newdp);
if ((mp->mnt_flag & MNT_RDONLY) == 0)
@@ -1355,6 +1356,7 @@ dounmount(mp, flags, td)
mtx_lock(&mountlist_mtx);
TAILQ_REMOVE(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
+ EVENTHANDLER_INVOKE(vfs_unmounted, mp, td);
if (coveredvp != NULL) {
coveredvp->v_mountedhere = NULL;
vput(coveredvp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index e64f379..e2c2813 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -3455,6 +3455,8 @@ vfs_msync(struct mount *mp, int flags)
static void
destroy_vpollinfo(struct vpollinfo *vi)
{
+
+ knlist_clear(&vi->vpi_selinfo.si_note, 1);
seldrain(&vi->vpi_selinfo);
knlist_destroy(&vi->vpi_selinfo.si_note);
mtx_destroy(&vi->vpi_lock);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 7eac0ef..06e59f9 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1668,8 +1668,7 @@ vn_finished_secondary_write(mp)
* Request a filesystem to suspend write operations.
*/
int
-vfs_write_suspend(mp)
- struct mount *mp;
+vfs_write_suspend(struct mount *mp, int flags)
{
int error;
@@ -1680,6 +1679,21 @@ vfs_write_suspend(mp)
}
while (mp->mnt_kern_flag & MNTK_SUSPEND)
msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
+
+ /*
+ * Unmount holds a write reference on the mount point. If we
+ * own busy reference and drain for writers, we deadlock with
+ * the reference draining in the unmount path. Callers of
+ * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if
+ * vfs_busy() reference is owned and caller is not in the
+ * unmount context.
+ */
+ if ((flags & VS_SKIP_UNMOUNT) != 0 &&
+ (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+ MNT_IUNLOCK(mp);
+ return (EBUSY);
+ }
+
mp->mnt_kern_flag |= MNTK_SUSPEND;
mp->mnt_susp_owner = curthread;
if (mp->mnt_writeopcount > 0)
OpenPOWER on IntegriCloud