diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_mbuf.c | 51 | ||||
-rw-r--r-- | sys/kern/kern_priv.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_rmlock.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_time.c | 9 | ||||
-rw-r--r-- | sys/kern/subr_bus.c | 8 | ||||
-rw-r--r-- | sys/kern/subr_witness.c | 8 | ||||
-rw-r--r-- | sys/kern/uipc_mbuf.c | 6 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 26 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 17 | ||||
-rw-r--r-- | sys/kern/vfs_cache.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_mount.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 2 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 18 |
14 files changed, 101 insertions, 68 deletions
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 1762b72..9e85806 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -102,7 +102,11 @@ int nmbclusters; /* limits number of mbuf clusters */ int nmbjumbop; /* limits number of page size jumbo clusters */ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ -struct mbstat mbstat; + +static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ + +SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0, + "Maximum real memory allocateable to various mbuf types"); /* * tunable_mbinit() has to be run before any mbuf allocations are done. @@ -110,7 +114,7 @@ struct mbstat mbstat; static void tunable_mbinit(void *dummy) { - quad_t realmem, maxmbufmem; + quad_t realmem; /* * The default limit for all mbuf related memory is 1/2 of all @@ -120,7 +124,7 @@ tunable_mbinit(void *dummy) realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_map_max(kmem_map) - vm_map_min(kmem_map)); maxmbufmem = realmem / 2; - TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem); + TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); if (maxmbufmem > realmem / 4 * 3) maxmbufmem = realmem / 4 * 3; @@ -162,8 +166,7 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) if (newnmbclusters > nmbclusters && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; - uma_zone_set_max(zone_clust, nmbclusters); - nmbclusters = uma_zone_get_max(zone_clust); + nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); EVENTHANDLER_INVOKE(nmbclusters_change); } else error = EINVAL; @@ -185,8 +188,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) if (newnmbjumbop > nmbjumbop && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; - uma_zone_set_max(zone_jumbop, nmbjumbop); - nmbjumbop = uma_zone_get_max(zone_jumbop); + nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); } else error = EINVAL; } @@ -204,11 +206,10 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) newnmbjumbo9 = nmbjumbo9; error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); if (error == 0 && req->newptr) { - if (newnmbjumbo9 > nmbjumbo9&& + if (newnmbjumbo9 > nmbjumbo9 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; - uma_zone_set_max(zone_jumbo9, nmbjumbo9); - nmbjumbo9 = uma_zone_get_max(zone_jumbo9); + nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); } else error = EINVAL; } @@ -229,8 +230,7 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) if (newnmbjumbo16 > nmbjumbo16 && nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; - uma_zone_set_max(zone_jumbo16, nmbjumbo16); - nmbjumbo16 = uma_zone_get_max(zone_jumbo16); + nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); } else error = EINVAL; } @@ -250,21 +250,17 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS) if (error == 0 && req->newptr) { if (newnmbufs > nmbufs) { nmbufs = newnmbufs; - uma_zone_set_max(zone_mbuf, nmbufs); - nmbufs = uma_zone_get_max(zone_mbuf); + nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); EVENTHANDLER_INVOKE(nmbufs_change); } else error = EINVAL; } return (error); } -SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW, +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, &nmbufs, 0, sysctl_nmbufs, "IU", "Maximum number of mbufs allowed"); -SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, - "Mbuf general information and statistics"); - /* * Zones from which we allocate. */ @@ -384,25 +380,6 @@ mbuf_init(void *dummy) */ EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, EVENTHANDLER_PRI_FIRST); - - /* - * [Re]set counters and local statistics knobs. - * XXX Some of these should go and be replaced, but UMA stat - * gathering needs to be revised. - */ - mbstat.m_mbufs = 0; - mbstat.m_mclusts = 0; - mbstat.m_drain = 0; - mbstat.m_msize = MSIZE; - mbstat.m_mclbytes = MCLBYTES; - mbstat.m_minclsize = MINCLSIZE; - mbstat.m_mlen = MLEN; - mbstat.m_mhlen = MHLEN; - mbstat.m_numtypes = MT_NTYPES; - - mbstat.m_mcfail = mbstat.m_mpfail = 0; - mbstat.sf_iocnt = 0; - mbstat.sf_allocwait = mbstat.sf_allocfail = 0; } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); diff --git a/sys/kern/kern_priv.c b/sys/kern/kern_priv.c index fcd5993..4d266ab 100644 --- a/sys/kern/kern_priv.c +++ b/sys/kern/kern_priv.c @@ -142,6 +142,16 @@ priv_check_cred(struct ucred *cred, int priv, int flags) } /* + * Writes to kernel/physical memory are a typical root-only operation, + * but non-root users are expected to be able to read it (provided they + * have permission to access /dev/[k]mem). + */ + if (priv == PRIV_KMEM_READ) { + error = 0; + goto out; + } + + /* * Now check with MAC, if enabled, to see if a policy module grants * privilege. */ diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index fa22272..ff397eb 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -704,6 +704,10 @@ _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, #endif #ifdef INVARIANT_SUPPORT +#ifndef INVARIANTS +#undef _rm_assert +#endif + /* * Note that this does not need to use witness_assert() for read lock * assertions since an exact count of read locks held by this thread diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 84b1c61..fb9c9bf 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -581,7 +581,7 @@ int should_yield(void) { - return (ticks - curthread->td_swvoltick >= hogticks); + return ((unsigned int)(ticks - curthread->td_swvoltick) >= hogticks); } void diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index b68c949..9e0cc06 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -297,14 +297,9 @@ get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats) PROC_UNLOCK(td2->td_proc); } else { pid = clock_id & CPUCLOCK_ID_MASK; - p2 = pfind(pid); - if (p2 == NULL) - return (EINVAL); - error = p_cansee(td, p2); - if (error) { - PROC_UNLOCK(p2); + error = pget(pid, PGET_CANSEE, &p2); + if (error != 0) return (EINVAL); - } get_process_cputime(p2, ats); PROC_UNLOCK(p2); } diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index f196c8b..717ded4 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/interrupt.h> +#include <net/vnet.h> + #include <machine/stdarg.h> #include <vm/uma.h> @@ -2735,7 +2737,11 @@ device_probe_and_attach(device_t dev) return (0); else if (error != 0) return (error); - return (device_attach(dev)); + + CURVNET_SET_QUIET(vnet0); + error = device_attach(dev); + CURVNET_RESTORE(); + return error; } /** diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index bf28a88..3b4d7a2 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -1138,12 +1138,16 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file, iclass = LOCK_CLASS(interlock); lock1 = find_instance(lock_list, interlock); if (lock1 == NULL) - kassert_panic("interlock (%s) %s not locked @ %s:%d", + kassert_panic( + "interlock (%s) %s not locked while locking" + " %s @ %s:%d", iclass->lc_name, interlock->lo_name, flags & LOP_EXCLUSIVE ? "exclusive" : "shared", fixup_filename(file), line); else if ((lock1->li_flags & LI_RECURSEMASK) != 0) - kassert_panic("interlock (%s) %s recursed @ %s:%d", + kassert_panic( + "interlock (%s) %s recursed while locking %s" + " @ %s:%d", iclass->lc_name, interlock->lo_name, flags & LOP_EXCLUSIVE ? "exclusive" : "shared", fixup_filename(file), line); diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index c369797..f555adf 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -649,13 +649,10 @@ m_copym(struct mbuf *m, int off0, int len, int wait) m = m->m_next; np = &n->m_next; } - if (top == NULL) - mbstat.m_mcfail++; /* XXX: No consistency. */ return (top); nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -860,7 +857,6 @@ m_copypacket(struct mbuf *m, int how) return top; nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -964,7 +960,6 @@ m_dup(struct mbuf *m, int how) nospace: m_freem(top); - mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } @@ -1124,7 +1119,6 @@ m_pullup(struct mbuf *n, int len) return (m); bad: m_freem(n); - mbstat.m_mpfail++; /* XXX: No consistency. */ return (NULL); } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a477820..46ceef2 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -115,6 +115,7 @@ static int getsockname1(struct thread *td, struct getsockname_args *uap, static int getpeername1(struct thread *td, struct getpeername_args *uap, int compat); +counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; /* * NSFBUFS-related variables and associated sysctls */ @@ -129,6 +130,27 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); +static void +sfstat_init(const void *unused) +{ + + COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), + M_WAITOK); +} +SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); + +static int +sfstat_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct sfstat s; + + COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); + if (req->newptr) + COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); + return (SYSCTL_OUT(req, &s, sizeof(s))); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, + NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); /* * Convert a user file descriptor to a kernel file entry and check if required * capability rights are present. @@ -2241,7 +2263,7 @@ retry_space: vm_page_io_finish(pg); if (!error) VM_OBJECT_WUNLOCK(obj); - mbstat.sf_iocnt++; + SFSTAT_INC(sf_iocnt); } if (error) { vm_page_lock(pg); @@ -2273,7 +2295,7 @@ retry_space: sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { - mbstat.sf_allocfail++; + SFSTAT_INC(sf_allocfail); vm_page_lock(pg); vm_page_unwire(pg, 0); KASSERT(pg->object != NULL, diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index f6d5a85..205e9b3 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -474,10 +474,12 @@ runningbufwakeup(struct buf *bp) { long space, bspace; - if (bp->b_runningbufspace == 0) - return; - space = atomic_fetchadd_long(&runningbufspace, -bp->b_runningbufspace); bspace = bp->b_runningbufspace; + if (bspace == 0) + return; + space = atomic_fetchadd_long(&runningbufspace, -bspace); + KASSERT(space >= bspace, ("runningbufspace underflow %ld %ld", + space, bspace)); bp->b_runningbufspace = 0; /* * Only acquire the lock and wakeup on the transition from exceeding @@ -561,7 +563,7 @@ waitrunningbufspace(void) mtx_lock(&rbreqlock); while (runningbufspace > hirunningspace) { - ++runningbufreq; + runningbufreq = 1; msleep(&runningbufreq, &rbreqlock, PVM, "wdrain", 0); } mtx_unlock(&rbreqlock); @@ -1692,7 +1694,8 @@ brelse(struct buf *bp) KASSERT(presid >= 0, ("brelse: extra page")); VM_OBJECT_WLOCK(obj); - vm_page_set_invalid(m, poffset, presid); + if (pmap_page_wired_mappings(m) == 0) + vm_page_set_invalid(m, poffset, presid); VM_OBJECT_WUNLOCK(obj); if (had_bogus) printf("avoided corruption bug in bogus_page/brelse code\n"); @@ -4485,8 +4488,8 @@ bdata2bio(struct buf *bp, struct bio *bip) bip->bio_flags |= BIO_UNMAPPED; KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) / PAGE_SIZE == bp->b_npages, - ("Buffer %p too short: %d %d %d", bp, bip->bio_ma_offset, - bip->bio_length, bip->bio_ma_n)); + ("Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset, + (long long)bip->bio_length, bip->bio_ma_n)); } else { bip->bio_data = bp->b_data; bip->bio_ma = NULL; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 9eecc09..31ed545 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -68,10 +68,10 @@ SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *", - "struct char *", "struct vnode *"); + "char *", "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int", - "struct vnode *", "struct char *"); + "struct vnode *", "char *"); SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative, diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 448fdbb..493bb98 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -861,8 +861,9 @@ vfs_domount_first( vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) panic("mount: lost mount"); - VOP_UNLOCK(newdp, 0); VOP_UNLOCK(vp, 0); + EVENTHANDLER_INVOKE(vfs_mounted, mp, newdp, td); + VOP_UNLOCK(newdp, 0); mountcheckdirs(vp, newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) @@ -1355,6 +1356,7 @@ dounmount(mp, flags, td) mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); + EVENTHANDLER_INVOKE(vfs_unmounted, mp, td); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; vput(coveredvp); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index e64f379..e2c2813 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -3455,6 +3455,8 @@ vfs_msync(struct mount *mp, int flags) static void destroy_vpollinfo(struct vpollinfo *vi) { + + knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 7eac0ef..06e59f9 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1668,8 +1668,7 @@ vn_finished_secondary_write(mp) * Request a filesystem to suspend write operations. */ int -vfs_write_suspend(mp) - struct mount *mp; +vfs_write_suspend(struct mount *mp, int flags) { int error; @@ -1680,6 +1679,21 @@ vfs_write_suspend(mp) } while (mp->mnt_kern_flag & MNTK_SUSPEND) msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0); + + /* + * Unmount holds a write reference on the mount point. If we + * own busy reference and drain for writers, we deadlock with + * the reference draining in the unmount path. Callers of + * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if + * vfs_busy() reference is owned and caller is not in the + * unmount context. + */ + if ((flags & VS_SKIP_UNMOUNT) != 0 && + (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { + MNT_IUNLOCK(mp); + return (EBUSY); + } + mp->mnt_kern_flag |= MNTK_SUSPEND; mp->mnt_susp_owner = curthread; if (mp->mnt_writeopcount > 0) |