diff options
63 files changed, 948 insertions, 267 deletions
diff --git a/contrib/libcxxrt/libelftc_dem_gnu3.c b/contrib/libcxxrt/libelftc_dem_gnu3.c index 70ef3e8..2b38c73 100644 --- a/contrib/libcxxrt/libelftc_dem_gnu3.c +++ b/contrib/libcxxrt/libelftc_dem_gnu3.c @@ -2842,7 +2842,7 @@ again: case 'w': /* wchar_t */ - if (!cpp_demangle_push_str(ddata, "wchar_t", 6)) + if (!cpp_demangle_push_str(ddata, "wchar_t", 7)) goto clean; ++ddata->cur; goto rtn; diff --git a/etc/ntp/leap-seconds b/etc/ntp/leap-seconds index 4fab58f..c31d19a 100644 --- a/etc/ntp/leap-seconds +++ b/etc/ntp/leap-seconds @@ -130,7 +130,7 @@ # Washington, DC # jeffrey.prillaman@usno.navy.mil # -# Last Update of leap second values: 11 Jan 2016 +# Last Update of leap second values: 6 Jul 2016 # # The following line shows this last update date in NTP timestamp # format. This is the date on which the most recent change to @@ -138,7 +138,7 @@ # be identified by the unique pair of characters in the first two # columns as shown below. # -#$ 3661459200 +#$ 3676752000 # # The data in this file will be updated periodically as new leap # seconds are announced. In addition to being entered on the line @@ -170,10 +170,10 @@ # current -- the update time stamp, the data and the name of the file # will not change. # -# Updated through IERS Bulletin C 51 -# File expires on: 1 Dec 2016 +# Updated through IERS Bulletin C 52 +# File expires on: 1 Jun 2017 # -#@ 3689539200 +#@ 3705264000 # 2272060800 10 # 1 Jan 1972 2287785600 11 # 1 Jul 1972 @@ -202,6 +202,7 @@ 3439756800 34 # 1 Jan 2009 3550089600 35 # 1 Jul 2012 3644697600 36 # 1 Jul 2015 +3692217600 37 # 1 Jan 2017 # # the following special comment contains the # hash value of the data in this file computed @@ -217,5 +218,5 @@ # the hash line is also ignored in the # computation. # -#h 63b4df04 0907d94f 2dadb7a1 684f7767 2a372421 +#h 63f8fea8 587c099d abcf130a ad525eae 3e105052 # diff --git a/lib/libc/gen/getnetgrent.c b/lib/libc/gen/getnetgrent.c index 8aa75e2..a8cc4c9 100644 --- a/lib/libc/gen/getnetgrent.c +++ b/lib/libc/gen/getnetgrent.c @@ -559,6 +559,10 @@ read_for_group(const char *group) continue; } } + if (strlen(result) == 0) { + free(result); + return (NULL); + } snprintf(line, LINSIZ, "%s %s", group, result); free(result); } diff --git a/lib/libthr/thread/thr_init.c b/lib/libthr/thread/thr_init.c index 77c2cf7..b5d1261 100644 --- a/lib/libthr/thread/thr_init.c +++ b/lib/libthr/thread/thr_init.c @@ -438,7 +438,6 @@ init_private(void) _thr_urwlock_init(&_thr_atfork_lock); _thr_umutex_init(&_thr_event_lock); _thr_umutex_init(&_suspend_all_lock); - _thr_once_init(); _thr_spinlock_init(); _thr_list_init(); _thr_wake_addr_init(); diff --git a/lib/libthr/thread/thr_once.c b/lib/libthr/thread/thr_once.c index 4f70374..4492777 100644 --- a/lib/libthr/thread/thr_once.c +++ b/lib/libthr/thread/thr_once.c @@ -92,8 +92,3 @@ _pthread_once(pthread_once_t *once_control, void (*init_routine) (void)) _thr_umtx_wake(&once_control->state, INT_MAX, 0); return (0); } - -void -_thr_once_init() -{ -} diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h index e2e8b0f..be626d9 100644 --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -773,7 +773,6 @@ void _thr_link(struct pthread *, struct pthread *) __hidden; void _thr_unlink(struct pthread *, struct pthread *) __hidden; void _thr_assert_lock_level(void) __hidden __dead2; void _thr_ast(struct pthread *) __hidden; -void _thr_once_init(void) __hidden; void _thr_report_creation(struct pthread *curthread, struct pthread *newthread) __hidden; void _thr_report_death(struct pthread *curthread) __hidden; diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 44d6a83..1d85ef5 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -994,6 +994,8 @@ MLINKS+=mutex.9 mtx_assert.9 \ mutex.9 MTX_SYSINIT.9 \ mutex.9 mtx_trylock.9 \ mutex.9 mtx_trylock_flags.9 \ + mutex.9 mtx_trylock_spin.9 \ + mutex.9 mtx_trylock_spin_flags.9 \ mutex.9 mtx_unlock.9 \ mutex.9 mtx_unlock_flags.9 \ mutex.9 mtx_unlock_spin.9 \ diff --git a/share/man/man9/mutex.9 b/share/man/man9/mutex.9 index c7cc1ef..5865165 100644 --- a/share/man/man9/mutex.9 +++ b/share/man/man9/mutex.9 @@ -28,7 +28,7 @@ .\" from BSDI $Id: mutex.4,v 1.1.2.3 1998/04/27 22:53:13 ewv Exp $ .\" $FreeBSD$ .\" -.Dd November 16, 2011 +.Dd July 18, 2016 .Dt MUTEX 9 .Os .Sh NAME @@ -41,6 +41,8 @@ .Nm mtx_lock_spin_flags , .Nm mtx_trylock , .Nm mtx_trylock_flags , +.Nm mtx_trylock_spin , +.Nm mtx_trylock_spin_flags , .Nm mtx_unlock , .Nm mtx_unlock_spin , .Nm mtx_unlock_flags , @@ -73,6 +75,10 @@ .Ft int .Fn mtx_trylock_flags "struct mtx *mutex" "int flags" .Ft void +.Fn mtx_trylock_spin "struct mtx *mutex" +.Ft int +.Fn mtx_trylock_spin_flags "struct mtx *mutex" "int flags" +.Ft void .Fn mtx_unlock "struct mtx *mutex" .Ft void .Fn mtx_unlock_spin "struct mtx *mutex" @@ -245,26 +251,33 @@ argument, then the mutex can be acquired recursively. .Pp The .Fn mtx_trylock -attempts to acquire the +and +.Fn mtx_trylock_spin +functions attempt to acquire a .Dv MTX_DEF -mutex pointed to by +or +.Dv MTX_SPIN +mutex, respectively, pointed to by .Fa mutex . -If the mutex cannot be immediately acquired -.Fn mtx_trylock -will return 0, -otherwise the mutex will be acquired -and a non-zero value will be returned. +If the mutex cannot be immediately acquired, the functions will return 0, +otherwise the mutex will be acquired and a non-zero value will be returned. .Pp The .Fn mtx_trylock_flags -function has the same behavior as +and +.Fn mtx_trylock_spin_flags +functions have the same behavior as .Fn mtx_trylock -but should be used when the caller desires to pass in a +and +.Fn mtx_trylock_spin +respectively, but should be used when the caller desires to pass in a .Fa flags value. Presently, the only valid value in the .Fn mtx_trylock -case is +and +.Fn mtx_trylock_spin +cases is .Dv MTX_QUIET , and its effects are identical to those described for .Fn mtx_lock @@ -443,6 +456,13 @@ while any spin lock is held. .It Dv MTX_RECURSE Specifies that the initialized mutex is allowed to recurse. This bit must be present if the mutex is permitted to recurse. +.Pp +Note that neither +.Fn mtx_trylock +nor +.Fn mtx_trylock_spin +support recursion; +that is, attempting to acquire an already-owned mutex fails. .It Dv MTX_QUIET Do not log any mutex operations for this lock. .It Dv MTX_NOWITNESS @@ -528,3 +548,7 @@ functions appeared in .Bsx 4.1 and .Fx 5.0 . +The +.Fn mtx_trylock_spin +function was added in +.Fx 12.0 . diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h index fb71b5a..7626c81 100644 --- a/sys/amd64/include/intr_machdep.h +++ b/sys/amd64/include/intr_machdep.h @@ -144,6 +144,8 @@ struct nmi_pcpu { extern struct mtx icu_lock; extern int elcr_found; +extern int msix_disable_migration; + #ifndef DEV_ATPIC void atpic_reset(void); #endif diff --git a/sys/conf/files b/sys/conf/files index 51d9fde..8add482 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1785,6 +1785,7 @@ dev/jme/if_jme.c optional jme pci dev/joy/joy.c optional joy dev/joy/joy_isa.c optional joy isa dev/joy/joy_pccard.c optional joy pccard +dev/kbd/kbd.c optional atkbd | pckbd | sc | ukbd | vt dev/kbdmux/kbdmux.c optional kbdmux dev/ksyms/ksyms.c optional ksyms dev/le/am7990.c optional le diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index e8d9c7d..e6b1e90 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -286,7 +286,6 @@ dev/hyperv/vmbus/vmbus.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv -dev/kbd/kbd.c optional atkbd | sc | ukbd | vt dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/nfe/if_nfe.c optional nfe pci diff --git a/sys/conf/files.arm b/sys/conf/files.arm index ecb7f85..0d3000d 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -66,7 +66,6 @@ crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/fb/fb.c optional sc dev/hwpmc/hwpmc_arm.c optional hwpmc -dev/kbd/kbd.c optional sc | vt dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 19b44ef..fe93e68 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -279,7 +279,6 @@ dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux -dev/kbd/kbd.c optional atkbd | sc | ukbd | vt dev/le/if_le_isa.c optional le isa dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev diff --git a/sys/conf/files.ia64 b/sys/conf/files.ia64 index 79caeaf..566cd9e 100644 --- a/sys/conf/files.ia64 +++ b/sys/conf/files.ia64 @@ -52,7 +52,6 @@ dev/fb/fb.c optional fb | vga dev/fb/vga.c optional vga dev/hwpmc/hwpmc_ia64.c optional hwpmc dev/io/iodev.c optional io -dev/kbd/kbd.c optional atkbd | sc | ukbd dev/syscons/scterm-teken.c optional sc dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc diff --git a/sys/conf/files.mips b/sys/conf/files.mips index 6522bb2..148fcf9 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -68,7 +68,6 @@ dev/cfe/cfe_env.c optional cfe_env # syscons support dev/fb/fb.c optional sc -dev/kbd/kbd.c optional sc dev/syscons/scgfbrndr.c optional sc dev/syscons/scterm-teken.c optional sc dev/syscons/scvtb.c optional sc diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index 598743a..533c58e 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -126,7 +126,6 @@ dev/hwpmc/hwpmc_ppro.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/io/iodev.c optional io -dev/kbd/kbd.c optional pckbd | sc | ukbd dev/le/if_le_cbus.c optional le isa dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 771968a..cde5490 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -39,7 +39,6 @@ dev/iicbus/adt746x.c optional adt746x powermac dev/iicbus/ds1631.c optional ds1631 powermac dev/iicbus/ds1775.c optional ds1775 powermac dev/iicbus/max6690.c optional max6690 powermac -dev/kbd/kbd.c optional sc | vt dev/nand/nfc_fsl.c optional nand mpc85xx # ofw can be either aim or fdt: fdt case handled in files. aim only powerpc specific. dev/ofw/openfirm.c optional aim diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64 index d62a893..5099ff4 100644 --- a/sys/conf/files.sparc64 +++ b/sys/conf/files.sparc64 @@ -40,7 +40,6 @@ dev/fb/fb.c optional sc dev/fb/gallant12x22.c optional sc dev/fb/machfb.c optional machfb sc dev/hwpmc/hwpmc_sparc64.c optional hwpmc -dev/kbd/kbd.c optional atkbd | sc | ukbd | vt dev/le/if_le_lebuffer.c optional le sbus dev/le/if_le_ledma.c optional le sbus dev/le/lebuffer_sbus.c optional le sbus diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 6711c06..46f3e48 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -4436,6 +4436,7 @@ em_setup_receive_ring(struct rx_ring *rxr) addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); + rxbuf->paddr = paddr; em_setup_rxdesc(&rxr->rx_base[j], rxbuf); continue; } diff --git a/sys/dev/fb/vesa.c b/sys/dev/fb/vesa.c index 1db24bd..bd4b261 100644 --- a/sys/dev/fb/vesa.c +++ b/sys/dev/fb/vesa.c @@ -79,6 +79,7 @@ struct adp_state { typedef struct adp_state adp_state_t; static struct mtx vesa_lock; +MTX_SYSINIT(vesa_lock, &vesa_lock, "VESA lock", MTX_DEF); static int vesa_state; static void *vesa_state_buf; @@ -1915,8 +1916,6 @@ vesa_load(void) if (vesa_init_done) return (0); - mtx_init(&vesa_lock, "VESA lock", NULL, MTX_DEF); - /* locate a VGA adapter */ vesa_adp = NULL; error = vesa_configure(0); @@ -1955,7 +1954,6 @@ vesa_unload(void) } vesa_bios_uninit(); - mtx_destroy(&vesa_lock); return (error); } diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c index d9c29e3..936e4e1 100644 --- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c +++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c @@ -2062,8 +2062,8 @@ storvsc_io_done(struct hv_storvsc_request *reqp) * For more information about INQUIRY, please refer to: * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf */ - const struct scsi_inquiry_data *inq_data = - (const struct scsi_inquiry_data *)csio->data_ptr; + struct scsi_inquiry_data *inq_data = + (struct scsi_inquiry_data *)csio->data_ptr; uint8_t* resp_buf = (uint8_t*)csio->data_ptr; /* Get the buffer length reported by host */ int resp_xfer_len = vm_srb->transfer_len; @@ -2092,6 +2092,25 @@ storvsc_io_done(struct hv_storvsc_request *reqp) mtx_unlock(&sc->hs_lock); } } else { + char vendor[16]; + cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), + sizeof(vendor)); + /** + * XXX: upgrade SPC2 to SPC3 if host is WIN8 or WIN2012 R2 + * in order to support UNMAP feature + */ + if (!strncmp(vendor,"Msft",4) && + SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 && + (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 || + vmstor_proto_version== VMSTOR_PROTOCOL_VERSION_WIN8)) { + inq_data->version = SCSI_REV_SPC3; + if (bootverbose) { + mtx_lock(&sc->hs_lock); + xpt_print(ccb->ccb_h.path, + "storvsc upgrades SPC2 to SPC3\n"); + mtx_unlock(&sc->hs_lock); + } + } ccb->ccb_h.status |= CAM_REQ_CMP; if (bootverbose) { mtx_lock(&sc->hs_lock); diff --git a/sys/dev/pty/pty.c b/sys/dev/pty/pty.c index 5036cb2..ad34e11 100644 --- a/sys/dev/pty/pty.c +++ b/sys/dev/pty/pty.c @@ -52,10 +52,10 @@ __FBSDID("$FreeBSD$"); * binary emulation. */ -static unsigned int pty_warningcnt = 1; +static unsigned pty_warningcnt = 1; SYSCTL_UINT(_kern, OID_AUTO, tty_pty_warningcnt, CTLFLAG_RW, - &pty_warningcnt, 0, - "Warnings that will be triggered upon legacy PTY allocation"); + &pty_warningcnt, 0, + "Warnings that will be triggered upon legacy PTY allocation"); static int ptydev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *fp) @@ -77,12 +77,7 @@ ptydev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *fp) } /* Raise a warning when a legacy PTY has been allocated. */ - if (pty_warningcnt > 0) { - pty_warningcnt--; - log(LOG_INFO, "pid %d (%s) is using legacy pty devices%s\n", - td->td_proc->p_pid, td->td_name, - pty_warningcnt ? "" : " - not logging anymore"); - } + counted_warning(&pty_warningcnt, "is using legacy pty devices"); return (0); } diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index cc85e1c..62b59ba 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -2183,9 +2183,11 @@ skip_thunk: return (EINVAL); if (vw == vd->vd_curwindow) { + mtx_lock(&Giant); kbd = kbd_get_keyboard(vd->vd_keyboard); if (kbd != NULL) vt_save_kbd_state(vw, kbd); + mtx_unlock(&Giant); } vi->m_num = vd->vd_curwindow->vw_number + 1; diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h index 8fb61a5..b71bbb6 100644 --- a/sys/i386/include/intr_machdep.h +++ b/sys/i386/include/intr_machdep.h @@ -141,6 +141,8 @@ struct trapframe; extern struct mtx icu_lock; extern int elcr_found; +extern int msix_disable_migration; + #ifndef DEV_ATPIC void atpic_reset(void); #endif diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index eeb8369..8357ab8 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -751,6 +751,8 @@ interpret: if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); + /* STOPs are no longer ignored, arrange for AST */ + signotify(td); } /* diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index d50db75..a5595c9 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1044,9 +1044,9 @@ fork_exit(void (*callout)(void *, struct trapframe *), void *arg, /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. Giant is not held on entry, and must not - * be held on return. This function is passed in to fork_exit() as the - * first parameter and is called when returning to a new userland process. + * directly into user mode. This function is passed in to fork_exit() + * as the first parameter and is called when returning to a new + * userland process. */ void fork_return(struct thread *td, struct trapframe *frame) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 908f99a..c847afc 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -285,6 +285,34 @@ __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } +int +__mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file, + int line) +{ + struct mtx *m; + + if (SCHEDULER_STOPPED()) + return (1); + + m = mtxlock2mtx(c); + + KASSERT(m->mtx_lock != MTX_DESTROYED, + ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line)); + KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, + ("mtx_trylock_spin() of sleep mutex %s @ %s:%d", + m->lock_object.lo_name, file, line)); + KASSERT((opts & MTX_RECURSE) == 0, + ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n", + m->lock_object.lo_name, file, line)); + if (__mtx_trylock_spin(m, curthread, opts, file, line)) { + LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line); + WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); + return (1); + } + LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line); + return (0); +} + void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index 5698bd4..d110fbf 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -1185,3 +1185,24 @@ sbuf_hexdump(struct sbuf *sb, const void *ptr, int length, const char *hdr, } } +#ifdef _KERNEL +void +counted_warning(unsigned *counter, const char *msg) +{ + struct thread *td; + unsigned c; + + for (;;) { + c = *counter; + if (c == 0) + break; + if (atomic_cmpset_int(counter, c, c - 1)) { + td = curthread; + log(LOG_INFO, "pid %d (%s) %s%s\n", + td->td_proc->p_pid, td->td_name, msg, + c > 1 ? "" : " - not logging anymore"); + break; + } + } +} +#endif diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 0b5d380b..bc2e294 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -108,17 +108,29 @@ userret(struct thread *td, struct trapframe *frame) td->td_name); KASSERT((p->p_flag & P_WEXIT) == 0, ("Exiting process returns to usermode")); -#if 0 #ifdef DIAGNOSTIC - /* Check that we called signotify() enough. */ - PROC_LOCK(p); - thread_lock(td); - if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || - (td->td_flags & TDF_ASTPENDING) == 0)) - printf("failed to set signal flags properly for ast()\n"); - thread_unlock(td); - PROC_UNLOCK(p); -#endif + /* + * Check that we called signotify() enough. For + * multi-threaded processes, where signal distribution might + * change due to other threads changing sigmask, the check is + * racy and cannot be performed reliably. + * If current process is vfork child, indicated by P_PPWAIT, then + * issignal() ignores stops, so we block the check to avoid + * classifying pending signals. + */ + if (p->p_numthreads == 1) { + PROC_LOCK(p); + thread_lock(td); + if ((p->p_flag & P_PPWAIT) == 0) { + KASSERT(!SIGPENDING(td) || (td->td_flags & + (TDF_NEEDSIGCHK | TDF_ASTPENDING)) == + (TDF_NEEDSIGCHK | TDF_ASTPENDING), + ("failed to set signal flags for ast p %p " + "td %p fl %x", p, td, td->td_flags)); + } + thread_unlock(td); + PROC_UNLOCK(p); + } #endif #ifdef KTRACE KTRUSERRET(td); @@ -268,6 +280,29 @@ ast(struct trapframe *framep) #endif } +#ifdef DIAGNOSTIC + if (p->p_numthreads == 1 && (flags & TDF_NEEDSIGCHK) == 0) { + PROC_LOCK(p); + thread_lock(td); + /* + * Note that TDF_NEEDSIGCHK should be re-read from + * td_flags, since signal might have been delivered + * after we cleared td_flags above. This is one of + * the reason for looping check for AST condition. + * See comment in userret() about P_PPWAIT. + */ + if ((p->p_flag & P_PPWAIT) == 0) { + KASSERT(!SIGPENDING(td) || (td->td_flags & + (TDF_NEEDSIGCHK | TDF_ASTPENDING)) == + (TDF_NEEDSIGCHK | TDF_ASTPENDING), + ("failed2 to set signal flags for ast p %p td %p " + "fl %x %x", p, td, flags, td->td_flags)); + } + thread_unlock(td); + PROC_UNLOCK(p); + } +#endif + /* * Check for signals. Unlocked reads of p_pendingcnt or * p_siglist might cause process-directed signal to be handled diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index f648cc9..3e7240b 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -751,12 +751,12 @@ tcp_input(struct mbuf *m, int off0) /* * Locate pcb for segment; if we're likely to add or remove a - * connection then first acquire pcbinfo lock. There are two cases + * connection then first acquire pcbinfo lock. There are three cases * where we might discover later we need a write lock despite the - * flags: ACKs moving a connection out of the syncache, and ACKs for - * a connection in TIMEWAIT. + * flags: ACKs moving a connection out of the syncache, ACKs for a + * connection in TIMEWAIT and SYNs not targeting a listening socket. */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) { + if ((thflags & (TH_FIN | TH_RST)) != 0) { INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; } else @@ -981,10 +981,12 @@ relocked: * now be in TIMEWAIT. */ #ifdef INVARIANTS - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) + if ((thflags & (TH_FIN | TH_RST)) != 0) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #endif - if (tp->t_state != TCPS_ESTABLISHED) { + if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || + (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) && + !(tp->t_flags & TF_FASTOPEN)))) { if (ti_locked == TI_UNLOCKED) { if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); @@ -1025,17 +1027,13 @@ relocked: /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection - * attempt or the completion of a previous one. Because listen - * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be - * held in this case. + * attempt or the completion of a previous one. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - bzero(&inc, sizeof(inc)); #ifdef INET6 if (isipv6) { @@ -1058,6 +1056,8 @@ relocked: * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1346,8 +1346,12 @@ new_tfo_socket: #endif /* * Entry added to syncache and mbuf consumed. - * Everything already unlocked by syncache_add(). + * Only the listen socket is unlocked by syncache_add(). */ + if (ti_locked == TI_WLOCKED) { + INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; } else if (tp->t_state == TCPS_LISTEN) { diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 03e8c72..f71d028 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1107,7 +1107,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m, * Global TCP locks are held because we manipulate the PCB lists * and create a new socket. */ - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending; *lsop = syncache_socket(sc, *lsop, m); @@ -1175,7 +1175,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, int tfo_response_cookie_valid = 0; #endif - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); @@ -1229,21 +1228,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); goto done; } else mac_syncache_create(maclabel, inp); #endif #ifdef TCP_RFC7413 - if (!tfo_cookie_valid) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); - } -#else - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); + if (!tfo_cookie_valid) #endif - + INP_WUNLOCK(inp); + /* * Remember the IP options, if any. */ @@ -1272,10 +1265,8 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, SCH_LOCK_ASSERT(sch); if (sc != NULL) { #ifdef TCP_RFC7413 - if (tfo_cookie_valid) { + if (tfo_cookie_valid) INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); - } #endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index d721039..776ff4e 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -292,21 +292,29 @@ tcp_timer_2msl(void *xtp) /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle - * too long, or if 2MSL time is up from TIME_WAIT, delete connection - * control block. Otherwise, check again in a bit. + * too long delete connection control block. Otherwise, check + * again in a bit. + * + * If in TIME_WAIT state just ignore as this timeout is handled in + * tcp_tw_2msl_scan(). * * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. * Ignore fact that there were recent incoming segments. */ + if ((inp->inp_flags & INP_TIMEWAIT) != 0) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && tp->t_inpcb && tp->t_inpcb->inp_socket && (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); tp = tcp_close(tp); } else { - if (tp->t_state != TCPS_TIME_WAIT && - ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { + if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { if (!callout_reset(&tp->t_timers->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) { tp->t_timers->tt_flags &= ~TT_2MSL_RST; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 8d7e9a0..9955468 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -625,7 +625,9 @@ tcp_usr_disconnect(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); INP_WLOCK(inp); - if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + if (inp->inp_flags & INP_TIMEWAIT) + goto out; + if (inp->inp_flags & INP_DROPPED) { error = ECONNRESET; goto out; } diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index 53d4778..1adaf4d 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -328,7 +328,6 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, { struct socket *lso = inp->inp_socket; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 85e29db..a5981e5 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -679,7 +679,9 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ - if ((V_icmp6_nodeinfo & 5) != 5) + if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK | + ICMP6_NODEINFO_TMPADDROK)) != + (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) break; if (code != 0) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index e3c3229..0b49787 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -732,14 +732,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, (*carp_detach_p)(&ia->ia_ifa); goto out; } - if (pr == NULL) { - if (carp_attached) - (*carp_detach_p)(&ia->ia_ifa); - log(LOG_ERR, "nd6_prelist_add succeeded but " - "no prefix\n"); - error = EINVAL; - goto out; - } } /* relate the address to the prefix */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 0726675..222b984 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -114,6 +114,7 @@ VNET_DEFINE(int, nd6_debug) = 0; VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); +VNET_DEFINE(struct rwlock, nd6_lock); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) @@ -125,7 +126,7 @@ static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static struct llentry *nd6_free(struct llentry *, int); +static struct llentry *nd6_free(struct llentry **, int); static void nd6_llinfo_timer(void *); static void clear_llinfo_pqueue(struct llentry *); static int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *, @@ -142,6 +143,8 @@ void nd6_init(void) { + rw_init(&V_nd6_lock, "nd6"); + LIST_INIT(&V_nd_prefix); /* initialization of the default router list */ @@ -162,6 +165,7 @@ nd6_destroy() callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); + rw_destroy(&V_nd6_lock); } #endif @@ -466,10 +470,14 @@ nd6_llinfo_timer(void *arg) struct llentry *ln; struct in6_addr *dst; struct ifnet *ifp; - struct nd_ifinfo *ndi = NULL; + struct nd_ifinfo *ndi; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; + ifp = ln->lle_tbl->llt_ifp; + CURVNET_SET(ifp->if_vnet); + + ND6_RLOCK(); LLE_WLOCK(ln); if (callout_pending(&ln->la_timer)) { /* @@ -489,10 +497,10 @@ nd6_llinfo_timer(void *arg) * would have been 1. */ LLE_WUNLOCK(ln); + ND6_RUNLOCK(); + CURVNET_RESTORE(); return; } - ifp = ln->lle_tbl->llt_ifp; - CURVNET_SET(ifp->if_vnet); if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { @@ -512,8 +520,7 @@ nd6_llinfo_timer(void *arg) } if (ln->la_flags & LLE_DELETED) { - (void)nd6_free(ln, 0); - ln = NULL; + (void)nd6_free(&ln, 0); goto done; } @@ -539,9 +546,7 @@ nd6_llinfo_timer(void *arg) ln->la_hold = m0; clear_llinfo_pqueue(ln); } - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); - (void)nd6_free(ln, 0); - ln = NULL; + (void)nd6_free(&ln, 0); if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); @@ -558,7 +563,7 @@ nd6_llinfo_timer(void *arg) /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - (void)nd6_free(ln, 1); + (void)nd6_free(&ln, 1); ln = NULL; } break; @@ -585,9 +590,7 @@ nd6_llinfo_timer(void *arg) nd6_ns_output(ifp, dst, dst, ln, NULL); LLE_WLOCK(ln); } else { - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - (void)nd6_free(ln, 0); - ln = NULL; + (void)nd6_free(&ln, 0); } break; default: @@ -595,8 +598,10 @@ nd6_llinfo_timer(void *arg) __func__, ln->ln_state); } done: - if (ln != NULL) + if (ln != NULL) { + ND6_RUNLOCK(); LLE_FREE_LOCKED(ln); + } CURVNET_RESTORE(); } @@ -608,6 +613,7 @@ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; @@ -615,10 +621,18 @@ nd6_timer(void *arg) callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); + TAILQ_INIT(&drq); + /* expire default router list */ - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { + ND6_WLOCK(); + TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) if (dr->expire && dr->expire < time_uptime) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } /* @@ -813,29 +827,37 @@ regen_tmpaddr(struct in6_ifaddr *ia6) void nd6_purge(struct ifnet *ifp) { + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; + TAILQ_INIT(&drq); + /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ + ND6_WLOCK(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); + } + ND6_WUNLOCK(); + + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } /* Nuke prefix list entries toward ifp */ @@ -1046,13 +1068,27 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * that the change is safe. */ static struct llentry * -nd6_free(struct llentry *ln, int gc) +nd6_free(struct llentry **lnp, int gc) { - struct llentry *next; - struct nd_defrouter *dr; struct ifnet *ifp; + struct llentry *ln, *next; + struct nd_defrouter *dr; + + ln = *lnp; + *lnp = NULL; LLE_WLOCK_ASSERT(ln); + ND6_RLOCK_ASSERT(); + + ifp = ln->lle_tbl->llt_ifp; + if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0) + dr = defrouter_lookup_locked(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); + else + dr = NULL; + ND6_RUNLOCK(); + + if ((ln->la_flags & LLE_DELETED) == 0) + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. @@ -1062,11 +1098,7 @@ nd6_free(struct llentry *ln, int gc) /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); - ifp = ln->lle_tbl->llt_ifp; - if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { - dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); - if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* @@ -1091,6 +1123,7 @@ nd6_free(struct llentry *ln, int gc) next = LIST_NEXT(ln, lle_next); LLE_REMREF(ln); LLE_WUNLOCK(ln); + defrouter_rele(dr); return (next); } @@ -1173,6 +1206,8 @@ nd6_free(struct llentry *ln, int gc) IF_AFDATA_UNLOCK(ifp); + if (dr != NULL) + defrouter_rele(dr); return (next); } @@ -1251,12 +1286,13 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) /* * check for default route */ - if (IN6_ARE_ADDR_EQUAL(&in6addr_any, - &SIN6(rt_key(rt))->sin6_addr)) { - + if (IN6_ARE_ADDR_EQUAL(&in6addr_any, + &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); - if (dr != NULL) + if (dr != NULL) { dr->installed = 0; + defrouter_rele(dr); + } } break; } @@ -1444,12 +1480,22 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ - struct nd_defrouter *dr, *next; + struct nd_drhead drq; + struct nd_defrouter *dr; + + TAILQ_INIT(&drq); defrouter_reset(); - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { - defrtrlist_del(dr); + + ND6_WLOCK(); + while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL) + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } + defrouter_select(); break; } @@ -2200,30 +2246,33 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) struct nd_defrouter *dr; int error; - if (req->newptr) + if (req->newptr != NULL) return (EPERM); + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); - /* - * XXX locking - */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) - return (error); + break; d.flags = dr->raflags; d.rtlifetime = dr->rtlifetime; d.expire = dr->expire + (time_second - time_uptime); d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) - return (error); + break; } - return (0); + ND6_RUNLOCK(); + return (error); } static int diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 510d208..841530a 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -243,6 +243,7 @@ struct nd_defrouter { u_long expire; struct ifnet *ifp; int installed; /* is installed into kernel routing table */ + u_int refcnt; }; struct nd_prefixctl { @@ -342,6 +343,19 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861); #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) +/* Lock for the prefix and default router lists. */ +VNET_DECLARE(struct rwlock, nd6_lock); +#define V_nd6_lock VNET(nd6_lock) + +#define ND6_RLOCK() rw_rlock(&V_nd6_lock) +#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock) +#define ND6_WLOCK() rw_wlock(&V_nd6_lock) +#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock) +#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED) +#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED) +#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED) +#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED) + #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) VNET_DECLARE(struct callout, nd6_timer_ch); @@ -447,12 +461,17 @@ void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); void defrouter_reset(void); void defrouter_select(void); -void defrtrlist_del(struct nd_defrouter *); +void defrouter_ref(struct nd_defrouter *); +void defrouter_rele(struct nd_defrouter *); +bool defrouter_remove(struct in6_addr *, struct ifnet *); +void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *); +void defrouter_del(struct nd_defrouter *); void prelist_remove(struct nd_prefix *); int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *, struct nd_prefix **); void pfxlist_onlink_check(void); struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); +struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *); struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *); void rt6_flush(struct in6_addr *, struct ifnet *); int nd6_setdefaultiface(int); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 7e58cc5..9073c2b 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -895,25 +895,17 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * Remove the sender from the Default Router List and * update the Destination Cache entries. */ - struct nd_defrouter *dr; - struct in6_addr *in6; - - in6 = &L3_ADDR_SIN6(ln)->sin6_addr; - - dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); - if (dr) - defrtrlist_del(dr); - else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & - ND6_IFF_ACCEPT_RTADV) { + if (!defrouter_remove(&L3_ADDR_SIN6(ln)->sin6_addr, + ln->lle_tbl->llt_ifp) && + (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & + ND6_IFF_ACCEPT_RTADV) != 0) /* * Even if the neighbor is not in the default - * router list, the neighbor may be used - * as a next hop for some destinations - * (e.g. redirect case). So we must - * call rt6_flush explicitly. + * router list, the neighbor may be used as a + * next hop for some destinations (e.g. redirect + * case). So we must call rt6_flush explicitly. */ rt6_flush(&ip6->ip6_src, ifp); - } } ln->ln_router = is_router; } diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 4342f0a..4a3e8aa 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/malloc.h> #include <sys/mbuf.h> +#include <sys/refcount.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/time.h> @@ -218,6 +219,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) struct nd_defrouter *dr; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + dr = NULL; + /* * We only accept RAs only when the per-interface flag * ND6_IFF_ACCEPT_RTADV is on the receiving interface. @@ -367,6 +370,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) (void)prelist_update(&pr, dr, m, mcast); } } + if (dr != NULL) { + defrouter_rele(dr); + dr = NULL; + } /* * MTU @@ -444,10 +451,6 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) m_freem(m); } -/* - * default router list proccessing sub routines - */ - /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) @@ -476,6 +479,10 @@ nd6_rtmsg(int cmd, struct rtentry *rt) ifa_free(ifa); } +/* + * default router list proccessing sub routines + */ + static void defrouter_addreq(struct nd_defrouter *new) { @@ -504,16 +511,43 @@ defrouter_addreq(struct nd_defrouter *new) } struct nd_defrouter * -defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp) { struct nd_defrouter *dr; - TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) + ND6_LOCK_ASSERT(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) + if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) { + defrouter_ref(dr); return (dr); - } + } + return (NULL); +} + +struct nd_defrouter * +defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; - return (NULL); /* search failed */ + ND6_RLOCK(); + dr = defrouter_lookup_locked(addr, ifp); + ND6_RUNLOCK(); + return (dr); +} + +void +defrouter_ref(struct nd_defrouter *dr) +{ + + refcount_acquire(&dr->refcnt); +} + +void +defrouter_rele(struct nd_defrouter *dr) +{ + + if (refcount_release(&dr->refcnt)) + free(dr, M_IP6NDP); } /* @@ -548,15 +582,41 @@ defrouter_delreq(struct nd_defrouter *dr) } /* - * remove all default routes from default router list + * Remove all default routes from default router list. */ void defrouter_reset(void) { - struct nd_defrouter *dr; + struct nd_defrouter *dr, **dra; + int count, i; + + count = i = 0; + /* + * We can't delete routes with the ND lock held, so make a copy of the + * current default router list and use that when deleting routes. + */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) - defrouter_delreq(dr); + count++; + ND6_RUNLOCK(); + + dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO); + + ND6_RLOCK(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { + if (i == count) + break; + defrouter_ref(dr); + dra[i++] = dr; + } + ND6_RUNLOCK(); + + for (i = 0; i < count && dra[i] != NULL; i++) { + defrouter_delreq(dra[i]); + defrouter_rele(dra[i]); + } + free(dra, M_TEMP); /* * XXX should we also nuke any default routers in the kernel, by @@ -564,12 +624,53 @@ defrouter_reset(void) */ } +/* + * Look up a matching default router list entry and remove it. Returns true if a + * matching entry was found, false otherwise. + */ +bool +defrouter_remove(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; + + ND6_WLOCK(); + dr = defrouter_lookup_locked(addr, ifp); + if (dr == NULL) { + ND6_WUNLOCK(); + return (false); + } + + defrouter_unlink(dr, NULL); + ND6_WUNLOCK(); + defrouter_del(dr); + defrouter_rele(dr); + return (true); +} + +/* + * Remove a router from the global list and optionally stash it in a + * caller-supplied queue. + * + * The ND lock must be held. + */ void -defrtrlist_del(struct nd_defrouter *dr) +defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq) +{ + + ND6_WLOCK_ASSERT(); + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + if (drq != NULL) + TAILQ_INSERT_TAIL(drq, dr, dr_entry); +} + +void +defrouter_del(struct nd_defrouter *dr) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; + ND6_UNLOCK_ASSERT(); + /* * Flush all the routing table entries that use the router * as a next hop. @@ -581,7 +682,6 @@ defrtrlist_del(struct nd_defrouter *dr) deldr = dr; defrouter_delreq(dr); } - TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. @@ -601,7 +701,10 @@ defrtrlist_del(struct nd_defrouter *dr) if (deldr) defrouter_select(); - free(dr, M_IP6NDP); + /* + * Release the list reference. + */ + defrouter_rele(dr); } /* @@ -628,27 +731,32 @@ defrtrlist_del(struct nd_defrouter *dr) void defrouter_select(void) { - struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; + struct nd_defrouter *dr, *selected_dr, *installed_dr; struct llentry *ln = NULL; + ND6_RLOCK(); /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ - if (TAILQ_EMPTY(&V_nd_defrouter)) + if (TAILQ_EMPTY(&V_nd_defrouter)) { + ND6_RUNLOCK(); return; + } /* * Search for a (probably) reachable router from the list. * We just pick up the first reachable one (if any), assuming that * the ordering rule of the list described in defrtrlist_update(). */ + selected_dr = installed_dr = NULL; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); if (selected_dr == NULL && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; + defrouter_ref(selected_dr); } IF_AFDATA_RUNLOCK(dr->ifp); if (ln != NULL) { @@ -656,12 +764,15 @@ defrouter_select(void) ln = NULL; } - if (dr->installed && installed_dr == NULL) - installed_dr = dr; - else if (dr->installed && installed_dr) { - /* this should not happen. warn for diagnosis. */ - log(LOG_ERR, "defrouter_select: more than one router" - " is installed\n"); + if (dr->installed) { + if (installed_dr == NULL) { + installed_dr = dr; + defrouter_ref(installed_dr); + } else { + /* this should not happen. warn for diagnosis. */ + log(LOG_ERR, + "defrouter_select: more than one router is installed\n"); + } } } /* @@ -673,21 +784,25 @@ defrouter_select(void) * or when the new one has a really higher preference value. */ if (selected_dr == NULL) { - if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) + if (installed_dr == NULL || + TAILQ_NEXT(installed_dr, dr_entry) == NULL) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - } else if (installed_dr) { + defrouter_ref(selected_dr); + } else if (installed_dr != NULL) { IF_AFDATA_RLOCK(installed_dr->ifp); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && rtpref(selected_dr) <= rtpref(installed_dr)) { + defrouter_rele(selected_dr); selected_dr = installed_dr; } IF_AFDATA_RUNLOCK(installed_dr->ifp); if (ln != NULL) LLE_RUNLOCK(ln); } + ND6_RUNLOCK(); /* * If the selected router is different than the installed one, @@ -695,10 +810,13 @@ defrouter_select(void) * Note that the selected router is never NULL here. */ if (installed_dr != selected_dr) { - if (installed_dr) + if (installed_dr != NULL) { defrouter_delreq(installed_dr); + defrouter_rele(installed_dr); + } defrouter_addreq(selected_dr); } + defrouter_rele(selected_dr); } /* @@ -734,13 +852,14 @@ defrtrlist_update(struct nd_defrouter *new) struct nd_defrouter *dr, *n; int oldpref; - if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { - /* entry exists */ - if (new->rtlifetime == 0) { - defrtrlist_del(dr); - return (NULL); - } + if (new->rtlifetime == 0) { + defrouter_remove(&new->rtaddr, new->ifp); + return (NULL); + } + ND6_WLOCK(); + dr = defrouter_lookup_locked(&new->rtaddr, new->ifp); + if (dr != NULL) { oldpref = rtpref(dr); /* override */ @@ -753,8 +872,10 @@ defrtrlist_update(struct nd_defrouter *new) * to sort the entries. Also make sure the selected * router is still installed in the kernel. */ - if (dr->installed && rtpref(new) == oldpref) + if (dr->installed && rtpref(new) == oldpref) { + ND6_WUNLOCK(); return (dr); + } /* * The preferred router may have changed, so relocate this @@ -762,19 +883,17 @@ defrtrlist_update(struct nd_defrouter *new) */ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); n = dr; - goto insert; + } else { + n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO); + if (n == NULL) { + ND6_WUNLOCK(); + return (NULL); + } + memcpy(n, new, sizeof(*n)); + /* Initialize with an extra reference for the caller. */ + refcount_init(&n->refcnt, 2); } - /* entry does not exist */ - if (new->rtlifetime == 0) - return (NULL); - - n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO); - if (n == NULL) - return (NULL); - memcpy(n, new, sizeof(*n)); - -insert: /* * Insert the new router in the Default Router List; * The Default Router List should be in the descending order @@ -787,10 +906,11 @@ insert: if (rtpref(n) > rtpref(dr)) break; } - if (dr) + if (dr != NULL) TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); + ND6_WUNLOCK(); defrouter_select(); @@ -819,6 +939,7 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) if (new == NULL) return; new->router = dr; + defrouter_ref(dr); LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); @@ -828,7 +949,9 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) static void pfxrtr_del(struct nd_pfxrouter *pfr) { + LIST_REMOVE(pfr, pfr_entry); + defrouter_rele(pfr->router); free(pfr, M_IP6NDP); } @@ -869,11 +992,9 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, new->ndpr_flags = pr->ndpr_flags; if ((error = in6_init_prefix_ltimes(new)) != 0) { free(new, M_IP6NDP); - return(error); + return (error); } new->ndpr_lastupdate = time_uptime; - if (newp != NULL) - *newp = new; /* initialization */ LIST_INIT(&new->ndpr_advrtrs); @@ -899,10 +1020,11 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, } } - if (dr) + if (dr != NULL) pfxrtr_add(new, dr); - - return 0; + if (newp != NULL) + *newp = new; + return (0); } void @@ -959,7 +1081,6 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; int error = 0; - int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; @@ -1017,23 +1138,17 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { - struct nd_prefix *newpr = NULL; - - newprefix = 1; - if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; - error = nd6_prelist_add(new, dr, &newpr); - if (error != 0 || newpr == NULL) { + error = nd6_prelist_add(new, dr, &pr); + if (error != 0) { nd6log((LOG_NOTICE, "prelist_update: " - "nd6_prelist_add failed for %s/%d on %s " - "errno=%d, returnpr=%p\n", + "nd6_prelist_add failed for %s/%d on %s errno=%d\n", ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), - new->ndpr_plen, if_name(new->ndpr_ifp), - error, newpr)); + new->ndpr_plen, if_name(new->ndpr_ifp), error)); goto end; /* we should just give up in this case. */ } @@ -1044,13 +1159,11 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, * addresses. Thus, we explicitly make sure that the prefix * itself expires now. */ - if (newpr->ndpr_raf_onlink == 0) { - newpr->ndpr_vltime = 0; - newpr->ndpr_pltime = 0; - in6_init_prefix_ltimes(newpr); + if (pr->ndpr_raf_onlink == 0) { + pr->ndpr_vltime = 0; + pr->ndpr_pltime = 0; + in6_init_prefix_ltimes(pr); } - - pr = newpr; } /* @@ -1346,6 +1459,7 @@ pfxlist_onlink_check() * that does not advertise any prefixes. */ if (pr == NULL) { + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; @@ -1356,6 +1470,7 @@ pfxlist_onlink_check() if (pfxrtr != NULL) break; } + ND6_RUNLOCK(); } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* diff --git a/sys/rpc/clnt_dg.c b/sys/rpc/clnt_dg.c index 36e63c6..068ca90 100644 --- a/sys/rpc/clnt_dg.c +++ b/sys/rpc/clnt_dg.c @@ -313,11 +313,9 @@ recheck_socket: cl->cl_netid = NULL; return (cl); err2: - if (cl) { - mem_free(cl, sizeof (CLIENT)); - if (cu) - mem_free(cu, sizeof (*cu)); - } + mem_free(cl, sizeof (CLIENT)); + mem_free(cu, sizeof (*cu)); + return (NULL); } diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c index 67ad58f..484e19e 100644 --- a/sys/rpc/clnt_vc.c +++ b/sys/rpc/clnt_vc.c @@ -270,12 +270,10 @@ clnt_vc_create( return (cl); err: - if (ct) { - mtx_destroy(&ct->ct_lock); - mem_free(ct, sizeof (struct ct_data)); - } - if (cl) - mem_free(cl, sizeof (CLIENT)); + mtx_destroy(&ct->ct_lock); + mem_free(ct, sizeof (struct ct_data)); + mem_free(cl, sizeof (CLIENT)); + return ((CLIENT *)NULL); } diff --git a/sys/rpc/svc.c b/sys/rpc/svc.c index a4cc484..ff25ee4 100644 --- a/sys/rpc/svc.c +++ b/sys/rpc/svc.c @@ -841,7 +841,7 @@ svcerr_progvers(struct svc_req *rqstp, rpcvers_t low_vers, rpcvers_t high_vers) * parameters. */ SVCXPRT * -svc_xprt_alloc() +svc_xprt_alloc(void) { SVCXPRT *xprt; SVCXPRT_EXT *ext; @@ -858,8 +858,7 @@ svc_xprt_alloc() * Free a server transport structure. */ void -svc_xprt_free(xprt) - SVCXPRT *xprt; +svc_xprt_free(SVCXPRT *xprt) { mem_free(xprt->xp_p3, sizeof(SVCXPRT_EXT)); diff --git a/sys/rpc/svc_dg.c b/sys/rpc/svc_dg.c index 1c530bd..121d151 100644 --- a/sys/rpc/svc_dg.c +++ b/sys/rpc/svc_dg.c @@ -142,9 +142,8 @@ svc_dg_create(SVCPOOL *pool, struct socket *so, size_t sendsize, return (xprt); freedata: (void) printf(svc_dg_str, __no_mem_str); - if (xprt) { - svc_xprt_free(xprt); - } + svc_xprt_free(xprt); + return (NULL); } diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index 1c9d5d5..ddaee4f 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -111,6 +111,8 @@ void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line); void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line); +int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, + const char *file, int line); void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) @@ -151,6 +153,8 @@ void thread_lock_flags_(struct thread *, int, const char *, int); __mtx_unlock_flags(&(m)->mtx_lock, o, f, l) #define _mtx_lock_spin_flags(m, o, f, l) \ __mtx_lock_spin_flags(&(m)->mtx_lock, o, f, l) +#define _mtx_trylock_spin_flags(m, o, f, l) \ + __mtx_trylock_spin_flags(&(m)->mtx_lock, o, f, l) #define _mtx_unlock_spin_flags(m, o, f, l) \ __mtx_unlock_spin_flags(&(m)->mtx_lock, o, f, l) #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) @@ -211,6 +215,21 @@ void thread_lock_flags_(struct thread *, int, const char *, int); LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, \ mp, 0, 0, (file), (line)); \ } while (0) +#define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \ + uintptr_t _tid = (uintptr_t)(tid); \ + int _ret; \ + \ + spinlock_enter(); \ + if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\ + spinlock_exit(); \ + _ret = 0; \ + } else { \ + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, \ + mp, 0, 0, file, line); \ + _ret = 1; \ + } \ + _ret; \ +}) #else /* SMP */ #define __mtx_lock_spin(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ @@ -223,6 +242,20 @@ void thread_lock_flags_(struct thread *, int, const char *, int); (mp)->mtx_lock = _tid; \ } \ } while (0) +#define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \ + uintptr_t _tid = (uintptr_t)(tid); \ + int _ret; \ + \ + spinlock_enter(); \ + if ((mp)->mtx_lock != MTX_UNOWNED) { \ + spinlock_exit(); \ + _ret = 0; \ + } else { \ + (mp)->mtx_lock = _tid; \ + _ret = 1; \ + } \ + _ret; \ +}) #endif /* SMP */ /* Unlock a normal mutex. */ @@ -292,6 +325,10 @@ void thread_lock_flags_(struct thread *, int, const char *, int); * mtx_trylock_flags(m, opts) is used the same way as mtx_trylock() but accepts * relevant option flags `opts.' * + * mtx_trylock_spin(m) attempts to acquire MTX_SPIN mutex `m' but doesn't + * spin if it cannot. Rather, it returns 0 on failure and non-zero on + * success. It always returns failure for recursed lock attempts. + * * mtx_initialized(m) returns non-zero if the lock `m' has been initialized. * * mtx_owned(m) returns non-zero if the current thread owns the lock `m' @@ -301,6 +338,7 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #define mtx_lock(m) mtx_lock_flags((m), 0) #define mtx_lock_spin(m) mtx_lock_spin_flags((m), 0) #define mtx_trylock(m) mtx_trylock_flags((m), 0) +#define mtx_trylock_spin(m) mtx_trylock_spin_flags((m), 0) #define mtx_unlock(m) mtx_unlock_flags((m), 0) #define mtx_unlock_spin(m) mtx_unlock_spin_flags((m), 0) @@ -338,6 +376,8 @@ extern struct mtx_pool *mtxpool_sleep; _mtx_unlock_flags((m), (opts), (file), (line)) #define mtx_lock_spin_flags_(m, opts, file, line) \ _mtx_lock_spin_flags((m), (opts), (file), (line)) +#define mtx_trylock_spin_flags_(m, opts, file, line) \ + _mtx_trylock_spin_flags((m), (opts), (file), (line)) #define mtx_unlock_spin_flags_(m, opts, file, line) \ _mtx_unlock_spin_flags((m), (opts), (file), (line)) #else /* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */ @@ -347,6 +387,8 @@ extern struct mtx_pool *mtxpool_sleep; __mtx_unlock((m), curthread, (opts), (file), (line)) #define mtx_lock_spin_flags_(m, opts, file, line) \ __mtx_lock_spin((m), curthread, (opts), (file), (line)) +#define mtx_trylock_spin_flags_(m, opts, file, line) \ + __mtx_trylock_spin((m), curthread, (opts), (file), (line)) #define mtx_unlock_spin_flags_(m, opts, file, line) \ __mtx_unlock_spin((m)) #endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */ @@ -372,6 +414,8 @@ extern struct mtx_pool *mtxpool_sleep; mtx_unlock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_trylock_flags(m, opts) \ mtx_trylock_flags_((m), (opts), LOCK_FILE, LOCK_LINE) +#define mtx_trylock_spin_flags(m, opts) \ + mtx_trylock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_assert(m, what) \ mtx_assert_((m), (what), __FILE__, __LINE__) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index ef60b89..b2de02d 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -701,7 +701,7 @@ struct proc { #define SW_TYPE_MASK 0xff /* First 8 bits are switch type */ #define SWT_NONE 0 /* Unspecified switch. */ #define SWT_PREEMPT 1 /* Switching due to preemption. */ -#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ +#define SWT_OWEPREEMPT 2 /* Switching due to owepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ #define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 747b8ff..523f3c9 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -173,13 +173,21 @@ struct syscall_module_data { struct sysent old_sysent; /* old sysent */ }; -#define MAKE_SYSENT(syscallname) \ -static struct sysent syscallname##_sysent = { \ - (sizeof(struct syscallname ## _args ) \ +/* separate initialization vector so it can be used in a substructure */ +#define SYSENT_INIT_VALS(_syscallname) { \ + .sy_narg = (sizeof(struct _syscallname ## _args ) \ / sizeof(register_t)), \ - (sy_call_t *)& sys_##syscallname, \ - SYS_AUE_##syscallname \ -} + .sy_call = (sy_call_t *)&sys_##_syscallname, \ + .sy_auevent = SYS_AUE_##_syscallname, \ + .sy_systrace_args_func = NULL, \ + .sy_entry = 0, \ + .sy_return = 0, \ + .sy_flags = 0, \ + .sy_thrcnt = 0 \ +} + +#define MAKE_SYSENT(syscallname) \ +static struct sysent syscallname##_sysent = SYSENT_INIT_VALS(syscallname); #define MAKE_SYSENT_COMPAT(syscallname) \ static struct sysent syscallname##_sysent = { \ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 83e4f12..e03a38b 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -435,4 +435,6 @@ void intr_prof_stack_use(struct thread *td, struct trapframe *frame); extern void (*softdep_ast_cleanup)(void); +void counted_warning(unsigned *counter, const char *msg); + #endif /* !_SYS_SYSTM_H_ */ diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index c563300..ac6d446 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -881,6 +881,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) struct buf *bp; u_int dsize; struct direct *ep, *nep; + u_int64_t old_isize; int error, ret, blkoff, loc, spacefree, flags, namlen; char *dirbuf; @@ -909,16 +910,18 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) return (error); } #endif + old_isize = dp->i_size; + vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ); if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); + vnode_pager_setsize(dvp, (u_long)old_isize); return (error); } dp->i_size = dp->i_offset + DIRBLKSIZ; DIP_SET(dp, i_size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; - vnode_pager_setsize(dvp, (u_long)dp->i_size); dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 3d91a71..4edd4ad 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1914,13 +1914,13 @@ ufs_mkdir(ap) dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; + vnode_pager_setsize(tvp, DIRBLKSIZ); if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, BA_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; DIP_SET(ip, i_size, DIRBLKSIZ); ip->i_flag |= IN_CHANGE | IN_UPDATE; - vnode_pager_setsize(tvp, (u_long)ip->i_size); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* diff --git a/sys/x86/x86/msi.c b/sys/x86/x86/msi.c index 381f097..37f0e16 100644 --- a/sys/x86/x86/msi.c +++ b/sys/x86/x86/msi.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/sx.h> +#include <sys/sysctl.h> #include <sys/systm.h> #include <x86/apicreg.h> #include <machine/cputypes.h> @@ -134,6 +135,20 @@ struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, msi_source_pending, NULL, NULL, msi_config_intr, msi_assign_cpu }; +/** + * Xen hypervisors prior to 4.6.0 do not properly handle updates to + * enabled MSI-X table entries. Allow migration of MSI-X interrupts + * to be disabled via a tunable. Values have the following meaning: + * + * -1: automatic detection by FreeBSD + * 0: enable migration + * 1: disable migration + */ +int msix_disable_migration = -1; +SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN, + &msix_disable_migration, 0, + "Disable migration of MSI-X interrupts between CPUs"); + static int msi_enabled; static int msi_last_irq; static struct mtx msi_lock; @@ -212,6 +227,9 @@ msi_assign_cpu(struct intsrc *isrc, u_int apic_id) if (msi->msi_first != msi) return (EINVAL); + if (msix_disable_migration && msi->msi_msix) + return (EINVAL); + /* Store information to free existing irq. */ old_vector = msi->msi_vector; old_id = msi->msi_cpu; @@ -284,6 +302,11 @@ msi_init(void) return; } + if (msix_disable_migration == -1) { + /* The default is to allow migration of MSI-X interrupts. */ + msix_disable_migration = 0; + } + msi_enabled = 1; intr_register_pic(&msi_pic); mtx_init(&msi_lock, "msi", NULL, MTX_DEF); diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 6c6f153..68f7fde 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -385,9 +385,29 @@ xen_hvm_init_hypercall_stubs(void) return (ENXIO); if (hypercall_stubs == NULL) { + int major, minor; + do_cpuid(base + 1, regs); - printf("XEN: Hypervisor version %d.%d detected.\n", - regs[0] >> 16, regs[0] & 0xffff); + + major = regs[0] >> 16; + minor = regs[0] & 0xffff; + printf("XEN: Hypervisor version %d.%d detected.\n", major, + minor); + + if (((major < 4) || (major == 4 && minor <= 5)) && + msix_disable_migration == -1) { + /* + * Xen hypervisors prior to 4.6.0 do not properly + * handle updates to enabled MSI-X table entries, + * so disable MSI-X interrupt migration in that + * case. + */ + if (bootverbose) + printf( +"Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" +"Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); + msix_disable_migration = 1; + } } /* diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index df1d171..e962404 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -873,6 +873,20 @@ OLD_FILES+=usr/sbin/fmtree OLD_FILES+=usr/share/man/man8/fmtree.8.gz .endif +.if ${MK_FTP} == no +OLD_FILES+=etc/ftpusers +OLD_FILES+=etc/rc.d/ftpd +OLD_FILES+=usr/bin/ftp +OLD_FILES+=usr/bin/gate-ftp +OLD_FILES+=usr/bin/pftp +OLD_FILES+=usr/libexec/ftpd +OLD_FILES+=usr/share/man/man1/ftp.1.gz +OLD_FILES+=usr/share/man/man1/gate-ftp.1.gz +OLD_FILES+=usr/share/man/man1/pftp.1.gz +OLD_FILES+=usr/share/man/man5/ftpchroot.5.gz +OLD_FILES+=usr/share/man/man8/ftpd.8.gz +.endif + .if ${MK_GNUCXX} == no OLD_FILES+=usr/bin/g++ OLD_FILES+=usr/include/c++/4.2/algorithm diff --git a/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military b/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military index 5188fc0..f5c0a71 100644 --- a/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military +++ b/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military @@ -20,7 +20,7 @@ LANG=ru_RU.KOI8-R 8 сен День Бородинского сражения русской армии под командованием М.И. Кутузова с французской армией (1812 год) 11 сен День победы русской эскадры под командованием Ф.Ф. Ушакова над турецкой эскадрой у мыса Тендра (1790 год) 21 сен День победы русских полков во главе с великим князем Дмитрием Донским над монголо-татарскими войсками в Куликовской битве (1380 год) - 7 ноя День освобождения Москвы силами народного ополчения под руководством Кузьмы Минина и Дмитрия Пожарского от польских интервентов (1612 год) + 4 ноя День освобождения Москвы силами народного ополчения под руководством Кузьмы Минина и Дмитрия Пожарского от польских интервентов (1612 год) 1 дек День победы русской эскадры под командованием П.С. Нахимова над турецкой эскадрой у мыса Синоп (1853 год) 5 дек День начала контрнаступления советских войск против немецко-фашистских войск в битве под Москвой (1941 год) 24 дек День взятия турецкой крепости Измаил русскими войсками под командованием А.В. Суворова (1790 год) diff --git a/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military b/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military index 48f257c..5efec09 100644 --- a/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military +++ b/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military @@ -20,7 +20,7 @@ LANG=ru_RU.UTF-8 8 я│п╣п╫ п■п╣п╫я▄ п▒п╬я─п╬п╢п╦п╫я│п╨п╬пЁп╬ я│я─п╟п╤п╣п╫п╦я▐ я─я┐я│я│п╨п╬п╧ п╟я─п╪п╦п╦ п©п╬п╢ п╨п╬п╪п╟п╫п╢п╬п╡п╟п╫п╦п╣п╪ п°.п≤. п я┐я┌я┐п╥п╬п╡п╟ я│ я└я─п╟п╫я├я┐п╥я│п╨п╬п╧ п╟я─п╪п╦п╣п╧ (1812 пЁп╬п╢) 11 я│п╣п╫ п■п╣п╫я▄ п©п╬п╠п╣п╢я▀ я─я┐я│я│п╨п╬п╧ я█я│п╨п╟п╢я─я▀ п©п╬п╢ п╨п╬п╪п╟п╫п╢п╬п╡п╟п╫п╦п╣п╪ п╓.п╓. пёя┬п╟п╨п╬п╡п╟ п╫п╟п╢ я┌я┐я─п╣я├п╨п╬п╧ я█я│п╨п╟п╢я─п╬п╧ я┐ п╪я▀я│п╟ п╒п╣п╫п╢я─п╟ (1790 пЁп╬п╢) 21 я│п╣п╫ п■п╣п╫я▄ п©п╬п╠п╣п╢я▀ я─я┐я│я│п╨п╦я┘ п©п╬п╩п╨п╬п╡ п╡п╬ пЁп╩п╟п╡п╣ я│ п╡п╣п╩п╦п╨п╦п╪ п╨п╫я▐п╥п╣п╪ п■п╪п╦я┌я─п╦п╣п╪ п■п╬п╫я│п╨п╦п╪ п╫п╟п╢ п╪п╬п╫пЁп╬п╩п╬-я┌п╟я┌п╟я─я│п╨п╦п╪п╦ п╡п╬п╧я│п╨п╟п╪п╦ п╡ п я┐п╩п╦п╨п╬п╡я│п╨п╬п╧ п╠п╦я┌п╡п╣ (1380 пЁп╬п╢) - 7 п╫п╬я▐ п■п╣п╫я▄ п╬я│п╡п╬п╠п╬п╤п╢п╣п╫п╦я▐ п°п╬я│п╨п╡я▀ я│п╦п╩п╟п╪п╦ п╫п╟я─п╬п╢п╫п╬пЁп╬ п╬п©п╬п╩я┤п╣п╫п╦я▐ п©п╬п╢ я─я┐п╨п╬п╡п╬п╢я│я┌п╡п╬п╪ п я┐п╥я▄п╪я▀ п°п╦п╫п╦п╫п╟ п╦ п■п╪п╦я┌я─п╦я▐ п÷п╬п╤п╟я─я│п╨п╬пЁп╬ п╬я┌ п©п╬п╩я▄я│п╨п╦я┘ п╦п╫я┌п╣я─п╡п╣п╫я┌п╬п╡ (1612 пЁп╬п╢) + 4 п╫п╬я▐ п■п╣п╫я▄ п╬я│п╡п╬п╠п╬п╤п╢п╣п╫п╦я▐ п°п╬я│п╨п╡я▀ я│п╦п╩п╟п╪п╦ п╫п╟я─п╬п╢п╫п╬пЁп╬ п╬п©п╬п╩я┤п╣п╫п╦я▐ п©п╬п╢ я─я┐п╨п╬п╡п╬п╢я│я┌п╡п╬п╪ п я┐п╥я▄п╪я▀ п°п╦п╫п╦п╫п╟ п╦ п■п╪п╦я┌я─п╦я▐ п÷п╬п╤п╟я─я│п╨п╬пЁп╬ п╬я┌ п©п╬п╩я▄я│п╨п╦я┘ п╦п╫я┌п╣я─п╡п╣п╫я┌п╬п╡ (1612 пЁп╬п╢) 1 п╢п╣п╨ п■п╣п╫я▄ п©п╬п╠п╣п╢я▀ я─я┐я│я│п╨п╬п╧ я█я│п╨п╟п╢я─я▀ п©п╬п╢ п╨п╬п╪п╟п╫п╢п╬п╡п╟п╫п╦п╣п╪ п÷.п║. п²п╟я┘п╦п╪п╬п╡п╟ п╫п╟п╢ я┌я┐я─п╣я├п╨п╬п╧ я█я│п╨п╟п╢я─п╬п╧ я┐ п╪я▀я│п╟ п║п╦п╫п╬п© (1853 пЁп╬п╢) 5 п╢п╣п╨ п■п╣п╫я▄ п╫п╟я┤п╟п╩п╟ п╨п╬п╫я┌я─п╫п╟я│я┌я┐п©п╩п╣п╫п╦я▐ я│п╬п╡п╣я┌я│п╨п╦я┘ п╡п╬п╧я│п╨ п©я─п╬я┌п╦п╡ п╫п╣п╪п╣я├п╨п╬-я└п╟я┬п╦я│я┌я│п╨п╦я┘ п╡п╬п╧я│п╨ п╡ п╠п╦я┌п╡п╣ п©п╬п╢ п°п╬я│п╨п╡п╬п╧ (1941 пЁп╬п╢) 24 п╢п╣п╨ п■п╣п╫я▄ п╡п╥я▐я┌п╦я▐ я┌я┐я─п╣я├п╨п╬п╧ п╨я─п╣п©п╬я│я┌п╦ п≤п╥п╪п╟п╦п╩ я─я┐я│я│п╨п╦п╪п╦ п╡п╬п╧я│п╨п╟п╪п╦ п©п╬п╢ п╨п╬п╪п╟п╫п╢п╬п╡п╟п╫п╦п╣п╪ п░.п▓. п║я┐п╡п╬я─п╬п╡п╟ (1790 пЁп╬п╢) diff --git a/usr.bin/systat/Makefile b/usr.bin/systat/Makefile index 817a0bc..a05f79a 100644 --- a/usr.bin/systat/Makefile +++ b/usr.bin/systat/Makefile @@ -6,7 +6,7 @@ PROG= systat SRCS= cmds.c cmdtab.c devs.c fetch.c iostat.c keyboard.c main.c \ netcmds.c netstat.c pigs.c swap.c icmp.c \ - mode.c ip.c sctp.c tcp.c \ + mode.c ip.c sctp.c tcp.c zarc.c \ vmstat.c convtbl.c ifcmds.c ifstat.c .if ${MK_INET6_SUPPORT} != "no" diff --git a/usr.bin/systat/cmdtab.c b/usr.bin/systat/cmdtab.c index 63a69f6..cb021e0 100644 --- a/usr.bin/systat/cmdtab.c +++ b/usr.bin/systat/cmdtab.c @@ -78,6 +78,9 @@ struct cmdtab cmdtab[] = { { "ifstat", showifstat, fetchifstat, labelifstat, initifstat, openifstat, closeifstat, cmdifstat, 0, CF_LOADAV }, + { "zarc", showzarc, fetchzarc, labelzarc, + initzarc, openzarc, closezarc, 0, + resetzarc, CF_ZFSARC }, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0 } }; struct cmdtab *curcmd = &cmdtab[0]; diff --git a/usr.bin/systat/extern.h b/usr.bin/systat/extern.h index 461ef8d..272eead 100644 --- a/usr.bin/systat/extern.h +++ b/usr.bin/systat/extern.h @@ -173,4 +173,5 @@ char *sysctl_dynread(const char *, size_t *); void reset ## name(void); \ void show ## name(void) +SYSTAT_CMD( zarc ); SYSTAT_CMD ( sctp ); diff --git a/usr.bin/systat/main.c b/usr.bin/systat/main.c index 8417811..b20b2b8 100644 --- a/usr.bin/systat/main.c +++ b/usr.bin/systat/main.c @@ -188,6 +188,11 @@ labels(void) "/0 /1 /2 /3 /4 /5 /6 /7 /8 /9 /10"); mvaddstr(1, 5, "Load Average"); } + if (curcmd->c_flags & CF_ZFSARC) { + mvaddstr(0, 20, + " Total MFU MRU Anon Hdr L2Hdr Other"); + mvaddstr(1, 5, "ZFS ARC "); + } (*curcmd->c_label)(); #ifdef notdef mvprintw(21, 25, "CPU usage on %s", hostname); @@ -221,8 +226,33 @@ display(void) if (j > 50) wprintw(wload, " %4.1f", avenrun[0]); } + if (curcmd->c_flags & CF_ZFSARC) { + uint64_t arc[7] = {}; + size_t size = sizeof(arc[0]); + if (sysctlbyname("kstat.zfs.misc.arcstats.size", + &arc[0], &size, NULL, 0) == 0 ) { + GETSYSCTL("vfs.zfs.mfu_size", arc[1]); + GETSYSCTL("vfs.zfs.mru_size", arc[2]); + GETSYSCTL("vfs.zfs.anon_size", arc[3]); + GETSYSCTL("kstat.zfs.misc.arcstats.hdr_size", arc[4]); + GETSYSCTL("kstat.zfs.misc.arcstats.l2_hdr_size", arc[5]); + GETSYSCTL("kstat.zfs.misc.arcstats.other_size", arc[6]); + wmove(wload, 0, 0); wclrtoeol(wload); + for (i = 0 ; i < sizeof(arc) / sizeof(arc[0]) ; i++) { + if (arc[i] > 10llu * 1024 * 1024 * 1024 ) { + wprintw(wload, "%7lluG", arc[i] >> 30); + } + else if (arc[i] > 10 * 1024 * 1024 ) { + wprintw(wload, "%7lluM", arc[i] >> 20); + } + else { + wprintw(wload, "%7lluK", arc[i] >> 10); + } + } + } + } (*curcmd->c_refresh)(); - if (curcmd->c_flags & CF_LOADAV) + if (curcmd->c_flags & (CF_LOADAV |CF_ZFSARC)) wrefresh(wload); wrefresh(wnd); move(CMDLINE, col); diff --git a/usr.bin/systat/systat.1 b/usr.bin/systat/systat.1 index 46c2cb9..4ac66a4 100644 --- a/usr.bin/systat/systat.1 +++ b/usr.bin/systat/systat.1 @@ -98,8 +98,9 @@ to be one of: .Ic sctp , .Ic swap , .Ic tcp , +.Ic vmstat , or -.Ic vmstat . +.Ic zarc , These displays can also be requested interactively (without the .Dq Fl ) and are described in @@ -430,6 +431,8 @@ Display statistics averaged over the refresh interval (the default). .It Cm zero Reset running statistics to zero. .El +.It Ic zarc +display arc cache usage and hit/miss statistics. .It Ic netstat Display, in the lower window, network connections. By default, diff --git a/usr.bin/systat/systat.h b/usr.bin/systat/systat.h index 1b4322c..ebd409e 100644 --- a/usr.bin/systat/systat.h +++ b/usr.bin/systat/systat.h @@ -54,6 +54,7 @@ extern int use_kvm; #define CF_INIT 0x1 /* been initialized */ #define CF_LOADAV 0x2 /* display w/ load average */ +#define CF_ZFSARC 0x4 /* display w/ ZFS cache usage */ #define TCP 0x1 #define UDP 0x2 diff --git a/usr.bin/systat/zarc.c b/usr.bin/systat/zarc.c new file mode 100644 index 0000000..2a6606f --- /dev/null +++ b/usr.bin/systat/zarc.c @@ -0,0 +1,221 @@ +/*- + * Copyright (c) 2014 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/sysctl.h> + +#include <string.h> + +#include "systat.h" +#include "extern.h" + +struct zfield{ + uint64_t arcstats; + uint64_t arcstats_demand_data; + uint64_t arcstats_demand_metadata; + uint64_t arcstats_prefetch_data; + uint64_t arcstats_prefetch_metadata; + uint64_t zfetchstats; + uint64_t arcstats_l2; + uint64_t vdev_cache_stats; +}; + +static struct zarcstats { + struct zfield hits; + struct zfield misses; +} curstat, initstat, oldstat; + +static void +getinfo(struct zarcstats *ls); + +WINDOW * +openzarc(void) +{ + return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0)); +} + +void +closezarc(WINDOW *w) +{ + if (w == NULL) + return; + wclear(w); + wrefresh(w); + delwin(w); +} + +void +labelzarc(void) +{ + wmove(wnd, 0, 0); wclrtoeol(wnd); + mvwprintw(wnd, 0, 31+1, "%4.4s %7.7s %7.7s %12.12s %12.12s", + "rate", "hits", "misses", "total hits", "total misses"); +#define L(row, str) mvwprintw(wnd, row, 5, str); \ + mvwprintw(wnd, row, 31, ":"); \ + mvwprintw(wnd, row, 31+4, "%%") + L(1, "arcstats"); + L(2, "arcstats.demand_data"); + L(3, "arcstats.demand_metadata"); + L(4, "arcstats.prefetch_data"); + L(5, "arcstats.prefetch_metadata"); + L(6, "zfetchstats"); + L(7, "arcstats.l2"); + L(8, "vdev_cache_stats"); +#undef L +} + +static int calc(uint64_t hits, uint64_t misses) +{ + if( hits ) + return 100 * hits / ( hits + misses ); + else + return 0; +} + +static void +domode(struct zarcstats *delta, struct zarcstats *rate) +{ +#define DO(stat) \ + delta->hits.stat = (curstat.hits.stat - oldstat.hits.stat); \ + delta->misses.stat = (curstat.misses.stat - oldstat.misses.stat); \ + rate->hits.stat = calc(delta->hits.stat, delta->misses.stat) + DO(arcstats); + DO(arcstats_demand_data); + DO(arcstats_demand_metadata); + DO(arcstats_prefetch_data); + DO(arcstats_prefetch_metadata); + DO(zfetchstats); + DO(arcstats_l2); + DO(vdev_cache_stats); + DO(arcstats); + DO(arcstats_demand_data); + DO(arcstats_demand_metadata); + DO(arcstats_prefetch_data); + DO(arcstats_prefetch_metadata); + DO(zfetchstats); + DO(arcstats_l2); + DO(vdev_cache_stats); +#undef DO +} + +void +showzarc(void) +{ + struct zarcstats delta, rate; + + memset(&delta, 0, sizeof delta); + memset(&rate, 0, sizeof rate); + + domode(&delta, &rate); + +#define DO(stat, row, col, fmt) \ + mvwprintw(wnd, row, col, fmt, stat) +#define R(row, stat) DO(rate.hits.stat, row, 31+1, "%3lu") +#define H(row, stat) DO(delta.hits.stat, row, 31+1+5, "%7lu"); \ + DO(curstat.hits.stat, row, 31+1+5+8+8, "%12lu") +#define M(row, stat) DO(delta.misses.stat, row, 31+1+5+8, "%7lu"); \ + DO(curstat.misses.stat, row, 31+1+5+8+8+13, "%12lu") +#define E(row, stat) R(row, stat); H(row, stat); M(row, stat); + E(1, arcstats); + E(2, arcstats_demand_data); + E(3, arcstats_demand_metadata); + E(4, arcstats_prefetch_data); + E(5, arcstats_prefetch_metadata); + E(6, zfetchstats); + E(7, arcstats_l2); + E(8, vdev_cache_stats); +#undef DO +#undef E +#undef M +#undef H +#undef R +} + +int +initzarc(void) +{ + getinfo(&initstat); + curstat = oldstat = initstat; + return 1; +} + +void +resetzarc(void) +{ + initzarc(); +} + +static void +getinfo(struct zarcstats *ls) +{ + size_t size = sizeof( ls->hits.arcstats ); + if ( sysctlbyname("kstat.zfs.misc.arcstats.hits", + &ls->hits.arcstats, &size, NULL, 0 ) != 0 ) + return; + GETSYSCTL("kstat.zfs.misc.arcstats.misses", + ls->misses.arcstats); + GETSYSCTL("kstat.zfs.misc.arcstats.demand_data_hits", + ls->hits.arcstats_demand_data); + GETSYSCTL("kstat.zfs.misc.arcstats.demand_data_misses", + ls->misses.arcstats_demand_data); + GETSYSCTL("kstat.zfs.misc.arcstats.demand_metadata_hits", + ls->hits.arcstats_demand_metadata); + GETSYSCTL("kstat.zfs.misc.arcstats.demand_metadata_misses", + ls->misses.arcstats_demand_metadata); + GETSYSCTL("kstat.zfs.misc.arcstats.prefetch_data_hits", + ls->hits.arcstats_prefetch_data); + GETSYSCTL("kstat.zfs.misc.arcstats.prefetch_data_misses", + ls->misses.arcstats_prefetch_data); + GETSYSCTL("kstat.zfs.misc.arcstats.prefetch_metadata_hits", + ls->hits.arcstats_prefetch_metadata); + GETSYSCTL("kstat.zfs.misc.arcstats.prefetch_metadata_misses", + ls->misses.arcstats_prefetch_metadata); + GETSYSCTL("kstat.zfs.misc.zfetchstats.hits", + ls->hits.zfetchstats); + GETSYSCTL("kstat.zfs.misc.zfetchstats.misses", + ls->misses.zfetchstats); + GETSYSCTL("kstat.zfs.misc.arcstats.l2_hits", + ls->hits.arcstats_l2); + GETSYSCTL("kstat.zfs.misc.arcstats.l2_misses", + ls->misses.arcstats_l2); + GETSYSCTL("kstat.zfs.misc.vdev_cache_stats.hits", + ls->hits.vdev_cache_stats); + GETSYSCTL("kstat.zfs.misc.vdev_cache_stats.misses", + ls->misses.vdev_cache_stats); +} + +void +fetchzarc(void) +{ + oldstat = curstat; + getinfo(&curstat); +} diff --git a/usr.sbin/crashinfo/crashinfo.sh b/usr.sbin/crashinfo/crashinfo.sh index 557d810..2a8419d 100755 --- a/usr.sbin/crashinfo/crashinfo.sh +++ b/usr.sbin/crashinfo/crashinfo.sh @@ -35,6 +35,22 @@ usage() exit 1 } +# Run a single gdb command against a kernel file in batch mode. +# The kernel file is specified as the first argument and the command +# is given in the remaining arguments. +gdb_command() +{ + local k + + k=$1 ; shift + + if [ -x /usr/local/bin/gdb ]; then + /usr/local/bin/gdb -batch -ex "$@" $k + else + echo -e "$@" | /usr/bin/gdb -x /dev/stdin -batch $k + fi +} + find_kernel() { local ivers k kvers @@ -55,8 +71,8 @@ find_kernel() # Look for a matching kernel version. for k in `sysctl -n kern.bootfile` $(ls -t /boot/*/kernel); do - kvers=$(echo 'printf " Version String: %s", version' | \ - gdb -x /dev/stdin -batch $k 2>/dev/null) + kvers=$(gdb_command $k 'printf " Version String: %s", version' \ + 2>/dev/null) if [ "$ivers" = "$kvers" ]; then KERNEL=$k break @@ -151,11 +167,10 @@ echo "Writing crash summary to $FILE." umask 077 # Simulate uname -ostype=$(echo -e printf '"%s", ostype' | gdb -x /dev/stdin -batch $KERNEL) -osrelease=$(echo -e printf '"%s", osrelease' | gdb -x /dev/stdin -batch $KERNEL) -version=$(echo -e printf '"%s", version' | gdb -x /dev/stdin -batch $KERNEL | \ - tr '\t\n' ' ') -machine=$(echo -e printf '"%s", machine' | gdb -x /dev/stdin -batch $KERNEL) +ostype=$(gdb_command $KERNEL 'printf "%s", ostype') +osrelease=$(gdb_command $KERNEL 'printf "%s", osrelease') +version=$(gdb_command $KERNEL 'printf "%s", version' | tr '\t\n' ' ') +machine=$(gdb_command $KERNEL 'printf "%s", machine') exec > $FILE 2>&1 @@ -174,7 +189,11 @@ file=`mktemp /tmp/crashinfo.XXXXXX` if [ $? -eq 0 ]; then echo "bt" >> $file echo "quit" >> $file - kgdb $KERNEL $VMCORE < $file + if [ -x /usr/local/bin/kgdb ]; then + /usr/local/bin/kgdb $KERNEL $VMCORE < $file + else + kgdb $KERNEL $VMCORE < $file + fi rm -f $file echo fi |