diff options
author | grehan <grehan@FreeBSD.org> | 2011-06-28 06:26:03 +0000 |
---|---|---|
committer | grehan <grehan@FreeBSD.org> | 2011-06-28 06:26:03 +0000 |
commit | 2c6741be0f59191f2283eb268e4f7690399d578a (patch) | |
tree | b139c8c6dcca4fa284815daade405b75886ee360 /sys/kern | |
parent | 3c35264f695e0a1f8a04dbcca1c93bb5159b2274 (diff) | |
parent | 19ae02bba572390c7299166228d31e54003e094a (diff) | |
download | FreeBSD-src-2c6741be0f59191f2283eb268e4f7690399d578a.zip FreeBSD-src-2c6741be0f59191f2283eb268e4f7690399d578a.tar.gz |
IFC @ r222830
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/imgact_aout.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_conf.c | 62 | ||||
-rw-r--r-- | sys/kern/kern_cpuset.c | 88 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 11 | ||||
-rw-r--r-- | sys/kern/kern_idle.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_ktr.c | 61 | ||||
-rw-r--r-- | sys/kern/kern_pmc.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_racct.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_rctl.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_rmlock.c | 15 | ||||
-rw-r--r-- | sys/kern/kern_shutdown.c | 33 | ||||
-rw-r--r-- | sys/kern/kern_sig.c | 6 | ||||
-rw-r--r-- | sys/kern/ksched.c | 2 | ||||
-rw-r--r-- | sys/kern/link_elf.c | 2 | ||||
-rw-r--r-- | sys/kern/sched_4bsd.c | 70 | ||||
-rw-r--r-- | sys/kern/sched_ule.c | 9 | ||||
-rw-r--r-- | sys/kern/subr_devstat.c | 14 | ||||
-rw-r--r-- | sys/kern/subr_kdb.c | 36 | ||||
-rw-r--r-- | sys/kern/subr_msgbuf.c | 191 | ||||
-rw-r--r-- | sys/kern/subr_pcpu.c | 8 | ||||
-rw-r--r-- | sys/kern/subr_prf.c | 160 | ||||
-rw-r--r-- | sys/kern/subr_rman.c | 1 | ||||
-rw-r--r-- | sys/kern/subr_smp.c | 95 | ||||
-rw-r--r-- | sys/kern/sys_process.c | 17 | ||||
-rw-r--r-- | sys/kern/uipc_socket.c | 14 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 5 |
27 files changed, 715 insertions, 208 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 2f889ca..3908da7 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -103,7 +103,7 @@ struct sysentvec aout_sysvec = { #elif defined(__amd64__) -#define AOUT32_USRSTACK 0xbfc0000 +#define AOUT32_USRSTACK 0xbfc00000 #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) @@ -152,7 +152,7 @@ aout_fixup(register_t **stack_base, struct image_params *imgp) { *(char **)stack_base -= sizeof(uint32_t); - return (suword(*stack_base, imgp->args->argc)); + return (suword32(*stack_base, imgp->args->argc)); } static int diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index 59b876c..a4d90c7 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -963,6 +963,68 @@ make_dev_alias_p(int flags, struct cdev **cdev, struct cdev *pdev, return (res); } +int +make_dev_physpath_alias(int flags, struct cdev **cdev, struct cdev *pdev, + struct cdev *old_alias, const char *physpath) +{ + char *devfspath; + int physpath_len; + int max_parentpath_len; + int parentpath_len; + int devfspathbuf_len; + int mflags; + int ret; + + *cdev = NULL; + devfspath = NULL; + physpath_len = strlen(physpath); + ret = EINVAL; + if (physpath_len == 0) + goto out; + + if (strncmp("id1,", physpath, 4) == 0) { + physpath += 4; + physpath_len -= 4; + if (physpath_len == 0) + goto out; + } + + max_parentpath_len = SPECNAMELEN - physpath_len - /*/*/1; + parentpath_len = strlen(pdev->si_name); + if (max_parentpath_len < parentpath_len) { + printf("make_dev_physpath_alias: WARNING - Unable to alias %s " + "to %s/%s - path too long\n", + pdev->si_name, physpath, pdev->si_name); + ret = ENAMETOOLONG; + goto out; + } + + mflags = (flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK; + devfspathbuf_len = physpath_len + /*/*/1 + parentpath_len + /*NUL*/1; + devfspath = malloc(devfspathbuf_len, M_DEVBUF, mflags); + if (devfspath == NULL) { + ret = ENOMEM; + goto out; + } + + sprintf(devfspath, "%s/%s", physpath, pdev->si_name); + if (old_alias != NULL + && strcmp(old_alias->si_name, devfspath) == 0) { + /* Retain the existing alias. */ + *cdev = old_alias; + old_alias = NULL; + ret = 0; + } else { + ret = make_dev_alias_p(flags, cdev, pdev, devfspath); + } +out: + if (old_alias != NULL) + destroy_dev(old_alias); + if (devfspath != NULL) + free(devfspath, M_DEVBUF); + return (ret); +} + static void destroy_devl(struct cdev *dev) { diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 6489ffb..e1f2801 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/cpuset.h> #include <sys/sx.h> #include <sys/queue.h> +#include <sys/libkern.h> #include <sys/limits.h> #include <sys/bus.h> #include <sys/interrupt.h> @@ -617,6 +618,86 @@ out: } /* + * Calculate the ffs() of the cpuset. + */ +int +cpusetobj_ffs(const cpuset_t *set) +{ + size_t i; + int cbit; + + cbit = 0; + for (i = 0; i < _NCPUWORDS; i++) { + if (set->__bits[i] != 0) { + cbit = ffsl(set->__bits[i]); + cbit += i * _NCPUBITS; + break; + } + } + return (cbit); +} + +/* + * Return a string representing a valid layout for a cpuset_t object. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +char * +cpusetobj_strprint(char *buf, const cpuset_t *set) +{ + char *tbuf; + size_t i, bytesp, bufsiz; + + tbuf = buf; + bytesp = 0; + bufsiz = CPUSETBUFSIZ; + + for (i = _NCPUWORDS - 1; i > 0; i--) { + bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]); + bufsiz -= bytesp; + tbuf += bytesp; + } + snprintf(tbuf, bufsiz, "%lx", set->__bits[0]); + return (buf); +} + +/* + * Build a valid cpuset_t object from a string representation. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +int +cpusetobj_strscan(cpuset_t *set, const char *buf) +{ + u_int nwords; + int i, ret; + + if (strlen(buf) > CPUSETBUFSIZ - 1) + return (-1); + + /* Allow to pass a shorter version of the mask when necessary. */ + nwords = 1; + for (i = 0; buf[i] != '\0'; i++) + if (buf[i] == ',') + nwords++; + if (nwords > _NCPUWORDS) + return (-1); + + CPU_ZERO(set); + for (i = nwords - 1; i > 0; i--) { + ret = sscanf(buf, "%lx, ", &set->__bits[i]); + if (ret == 0 || ret == -1) + return (-1); + buf = strstr(buf, " "); + if (buf == NULL) + return (-1); + buf++; + } + ret = sscanf(buf, "%lx", &set->__bits[0]); + if (ret == 0 || ret == -1) + return (-1); + return (0); +} + +/* * Apply an anonymous mask to a single thread. */ int @@ -754,12 +835,7 @@ cpuset_init(void *arg) { cpuset_t mask; - CPU_ZERO(&mask); -#ifdef SMP - mask.__bits[0] = all_cpus; -#else - mask.__bits[0] = 1; -#endif + mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 6510e13..bb25d17 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -701,8 +701,9 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options, */ if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { PROC_LOCK(p); - p->p_oppid = 0; proc_reparent(p, t); + p->p_pptr->p_dbg_child--; + p->p_oppid = 0; PROC_UNLOCK(p); pksignal(t, SIGCHLD, p->p_ksi); wakeup(t); @@ -794,7 +795,8 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options, pid = -q->p_pgid; PROC_UNLOCK(q); } - if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WNOWAIT|WLINUXCLONE)) + /* If we don't know the option, just return. */ + if (options & ~(WUNTRACED|WNOHANG|WCONTINUED|WNOWAIT|WLINUXCLONE)) return (EINVAL); loop: if (q->p_flag & P_STATCHILD) { @@ -873,7 +875,10 @@ loop: } if (nfound == 0) { sx_xunlock(&proctree_lock); - return (ECHILD); + if (td->td_proc->p_dbg_child) + return (0); + else + return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c index af12d7d..f412d17 100644 --- a/sys/kern/kern_idle.c +++ b/sys/kern/kern_idle.c @@ -60,7 +60,7 @@ idle_setup(void *dummy) p = NULL; /* start with no idle process */ #ifdef SMP - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { #endif #ifdef SMP error = kproc_kthread_add(sched_idletd, NULL, &p, &td, diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c index 2e5e06f..eff3d5b 100644 --- a/sys/kern/kern_ktr.c +++ b/sys/kern/kern_ktr.c @@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$"); #include "opt_alq.h" #include <sys/param.h> +#include <sys/queue.h> #include <sys/alq.h> #include <sys/cons.h> +#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/libkern.h> @@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$"); #define KTR_MASK (0) #endif -#ifndef KTR_CPUMASK -#define KTR_CPUMASK (~0) -#endif - #ifndef KTR_TIME #define KTR_TIME get_cyclecount() #endif @@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); -int ktr_cpumask = KTR_CPUMASK; -TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask); -SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW, - &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled"); - int ktr_mask = KTR_MASK; TUNABLE_INT("debug.ktr.mask", &ktr_mask); SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW, @@ -106,6 +99,54 @@ int ktr_version = KTR_VERSION; SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); +cpuset_t ktr_cpumask; +static char ktr_cpumask_str[CPUSETBUFSIZ]; +TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); + +static void +ktr_cpumask_initializer(void *dummy __unused) +{ + + CPU_FILL(&ktr_cpumask); +#ifdef KTR_CPUMASK + if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1) + CPU_FILL(&ktr_cpumask); +#endif + + /* + * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be + * already set, if necessary. + */ + if (ktr_cpumask_str[0] != '\0' && + cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1) + CPU_FILL(&ktr_cpumask); +} +SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY, + ktr_cpumask_initializer, NULL); + +static int +sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS) +{ + char lktr_cpumask_str[CPUSETBUFSIZ]; + cpuset_t imask; + int error; + + cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask); + error = sysctl_handle_string(oidp, lktr_cpumask_str, + sizeof(lktr_cpumask_str), req); + if (error != 0 || req->newptr == NULL) + return (error); + if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1) + return (EINVAL); + CPU_COPY(&imask, &ktr_cpumask); + + return (error); +} +SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask, + CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0, + sysctl_debug_ktr_cpumask, "S", + "Bitmask of CPUs on which KTR logging is enabled"); + volatile int ktr_idx = 0; struct ktr_entry ktr_buf[KTR_ENTRIES]; @@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, if ((ktr_mask & mask) == 0) return; cpu = KTR_CPU; - if (((1 << cpu) & ktr_cpumask) == 0) + if (!CPU_ISSET(cpu, &ktr_cpumask)) return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c index 7532378..8d9c7c0 100644 --- a/sys/kern/kern_pmc.c +++ b/sys/kern/kern_pmc.c @@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL; int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; /* Bitmask of CPUs requiring servicing at hardclock time */ -volatile cpumask_t pmc_cpumask; +volatile cpuset_t pmc_cpumask; /* * A global count of SS mode PMCs. When non-zero, this means that @@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu) { #ifdef SMP return (pmc_cpu_is_present(cpu) && - (hlt_cpus_mask & (1 << cpu)) == 0); + !CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (1); #endif @@ -139,7 +139,7 @@ int pmc_cpu_is_primary(int cpu) { #ifdef SMP - return ((logical_cpus_mask & (1 << cpu)) == 0); + return (!CPU_ISSET(cpu, &logical_cpus_mask)); #else return (1); #endif diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 98bd9c5..01f7777 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -104,8 +104,6 @@ SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", int racct_types[] = { [RACCT_CPU] = RACCT_IN_THOUSANDS, - [RACCT_FSIZE] = - RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_DATA] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_STACK] = @@ -120,8 +118,6 @@ int racct_types[] = { RACCT_RECLAIMABLE | RACCT_DENIABLE, [RACCT_NOFILE] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, - [RACCT_SBSIZE] = - RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_VMEM] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_NPTS] = @@ -627,7 +623,6 @@ racct_proc_exit(struct proc *p) /* * XXX: Free this some other way. */ - racct_set(p, RACCT_FSIZE, 0); racct_set(p, RACCT_NPTS, 0); racct_set(p, RACCT_NTHR, 0); racct_set(p, RACCT_RSS, 0); diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c index 2d43bdc..3d0a478 100644 --- a/sys/kern/kern_rctl.c +++ b/sys/kern/kern_rctl.c @@ -100,7 +100,6 @@ static struct dict subjectnames[] = { static struct dict resourcenames[] = { { "cpu", RACCT_CPU }, - { "fsize", RACCT_FSIZE }, { "data", RACCT_DATA }, { "stack", RACCT_STACK }, { "core", RACCT_CORE }, @@ -108,7 +107,6 @@ static struct dict resourcenames[] = { { "memlock", RACCT_MEMLOCK }, { "nproc", RACCT_NPROC }, { "nofile", RACCT_NOFILE }, - { "sbsize", RACCT_SBSIZE }, { "vmem", RACCT_VMEM }, { "npts", RACCT_NPTS }, { "swap", RACCT_SWAP }, diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 7f2b4e7..3214e1b 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) pc = pcpu_find(curcpu); /* Check if we just need to do a proper critical_exit. */ - if (!(pc->pc_cpumask & rm->rm_writecpus)) { + if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) { critical_exit(); return (1); } @@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) critical_enter(); pc = pcpu_find(curcpu); - rm->rm_writecpus &= ~pc->pc_cpumask; + CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask); rm_tracker_add(pc, tracker); sched_pin(); critical_exit(); @@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) * Fast path to combine two common conditions into a single * conditional jump. */ - if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask))) + if (0 == (td->td_owepreempt | + CPU_OVERLAP(&rm->rm_writecpus, &pc->pc_cpumask))) return (1); /* We do not have a read token and need to acquire one. */ @@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm) { struct rm_priotracker *prio; struct turnstile *ts; - cpumask_t readcpus; + cpuset_t readcpus; if (rm->lock_object.lo_flags & RM_SLEEPABLE) sx_xlock(&rm->rm_lock_sx); else mtx_lock(&rm->rm_lock_mtx); - if (rm->rm_writecpus != all_cpus) { + if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { /* Get all read tokens back */ - - readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus); + readcpus = all_cpus; + CPU_NAND(&readcpus, &rm->rm_writecpus); rm->rm_writecpus = all_cpus; /* diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 001da3d..60e854f 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -233,30 +233,32 @@ print_uptime(void) printf("%lds\n", (long)ts.tv_sec); } -static void -doadump(void) +int +doadump(boolean_t textdump) { + boolean_t coredump; - /* - * Sometimes people have to call this from the kernel debugger. - * (if 'panic' can not dump) - * Give them a clue as to why they can't dump. - */ - if (dumper.dumper == NULL) { - printf("Cannot dump. Device not defined or unavailable.\n"); - return; - } + if (dumping) + return (EBUSY); + if (dumper.dumper == NULL) + return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; + + coredump = TRUE; #ifdef DDB - if (textdump_pending) + if (textdump && textdump_pending) { + coredump = FALSE; textdump_dumpsys(&dumper); - else + } #endif + if (coredump) dumpsys(&dumper); + dumping--; + return (0); } static int @@ -425,7 +427,7 @@ kern_reboot(int howto) EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) - doadump(); + doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); @@ -553,11 +555,12 @@ panic(const char *fmt, ...) ; /* nothing */ #endif - bootopt = RB_AUTOBOOT | RB_DUMP; + bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { + bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index d5b49da..e1861eb 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/posix4.h> #include <sys/pioctl.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/sdt.h> #include <sys/sbuf.h> @@ -3173,14 +3174,15 @@ coredump(struct thread *td) * if it is larger than the limit. */ limit = (off_t)lim_cur(p, RLIMIT_CORE); - PROC_UNLOCK(p); - if (limit == 0) { + if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { + PROC_UNLOCK(p); #ifdef AUDIT audit_proc_coredump(td, name, EFBIG); #endif free(name, M_TEMP); return (EFBIG); } + PROC_UNLOCK(p); restart: NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, name, td); diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 7ee56d5..799b60d 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -206,7 +206,7 @@ ksched_setscheduler(struct ksched *ksched, if (param->sched_priority >= 0 && param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) { rtp.type = RTP_PRIO_NORMAL; - rtp.prio = p4prio_to_rtpprio(param->sched_priority); + rtp.prio = p4prio_to_tsprio(param->sched_priority); rtp_to_pri(&rtp, td); } else e = EINVAL; diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 38bf37f..2f9a1f6 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -950,11 +950,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, ef->ddbstrcnt = strcnt; ef->ddbstrtab = ef->strbase; +nosyms: error = link_elf_link_common_finish(lf); if (error != 0) goto out; -nosyms: *result = lf; out: diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index fef9e25..592bb80 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -156,7 +156,7 @@ static struct runq runq; static struct runq runq_pcpu[MAXCPU]; long runq_length[MAXCPU]; -static cpumask_t idle_cpus_mask; +static cpuset_t idle_cpus_mask; #endif struct pcpuidlestat { @@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); #ifdef SMP - idle_cpus_mask &= ~PCPU_GET(cpumask); + /* Spinlock held here, assume no migration. */ + CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif } else { if (TD_IS_RUNNING(td)) { @@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #ifdef SMP if (td->td_flags & TDF_IDLETD) - idle_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); @@ -1054,7 +1055,8 @@ static int forward_wakeup(int cpunum) { struct pcpu *pc; - cpumask_t dontuse, id, map, map2, me; + cpuset_t dontuse, id, map, map2, me; + int iscpuset; mtx_assert(&sched_lock, MA_OWNED); @@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum) /* * Check the idle mask we received against what we calculated * before in the old version. + * + * Also note that sched_lock is held now, thus no migration is + * expected. */ me = PCPU_GET(cpumask); /* Don't bother if we should be doing it ourself. */ - if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) + if (CPU_OVERLAP(&me, &idle_cpus_mask) && + (cpunum == NOCPU || CPU_ISSET(cpunum, &me))) return (0); - dontuse = me | stopped_cpus | hlt_cpus_mask; - map2 = 0; + dontuse = me; + CPU_OR(&dontuse, &stopped_cpus); + CPU_OR(&dontuse, &hlt_cpus_mask); + CPU_ZERO(&map2); if (forward_wakeup_use_loop) { - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((id & dontuse) == 0 && + if (!CPU_OVERLAP(&id, &dontuse) && pc->pc_curthread == pc->pc_idlethread) { - map2 |= id; + CPU_OR(&map2, &id); } } } if (forward_wakeup_use_mask) { - map = 0; - map = idle_cpus_mask & ~dontuse; + map = idle_cpus_mask; + CPU_NAND(&map, &dontuse); /* If they are both on, compare and use loop if different. */ if (forward_wakeup_use_loop) { - if (map != map2) { + if (CPU_CMP(&map, &map2)) { printf("map != map2, loop method preferred\n"); map = map2; } @@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum) /* If we only allow a specific CPU, then mask off all the others. */ if (cpunum != NOCPU) { KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); - map &= (1 << cpunum); + iscpuset = CPU_ISSET(cpunum, &map); + if (iscpuset == 0) + CPU_ZERO(&map); + else + CPU_SETOF(cpunum, &map); } - if (map) { + if (!CPU_EMPTY(&map)) { forward_wakeups_delivered++; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((map & id) == 0) + if (!CPU_OVERLAP(&map, &id)) continue; if (cpu_idle_wakeup(pc->pc_cpuid)) - map &= ~id; + CPU_NAND(&map, &id); } - if (map) + if (!CPU_EMPTY(&map)) ipi_selected(map, IPI_AST); return (1); } @@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid) int cpri; pcpu = pcpu_find(cpuid); - if (idle_cpus_mask & pcpu->pc_cpumask) { + if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) { forward_wakeups_delivered++; if (!cpu_idle_wakeup(cpuid)) ipi_cpu(cpuid, IPI_AST); @@ -1193,6 +1205,7 @@ void sched_add(struct thread *td, int flags) #ifdef SMP { + cpuset_t idle, me, tidlemsk; struct td_sched *ts; int forwarded = 0; int cpu; @@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags) kick_other_cpu(td->td_priority, cpu); } else { if (!single_cpu) { - cpumask_t me = PCPU_GET(cpumask); - cpumask_t idle = idle_cpus_mask & me; - if (!idle && ((flags & SRQ_INTR) == 0) && - (idle_cpus_mask & ~(hlt_cpus_mask | me))) + /* + * Thread spinlock is held here, assume no + * migration is possible. + */ + me = PCPU_GET(cpumask); + idle = idle_cpus_mask; + tidlemsk = idle; + CPU_AND(&idle, &me); + CPU_OR(&me, &hlt_cpus_mask); + CPU_NAND(&tidlemsk, &me); + + if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) && + !CPU_EMPTY(&tidlemsk)) forwarded = forward_wakeup(cpu); } diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index ac18e77..05267f3 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -564,7 +564,7 @@ struct cpu_search { #define CPUSET_FOREACH(cpu, mask) \ for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \ - if ((mask) & 1 << (cpu)) + if (CPU_ISSET(cpu, &mask)) static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low, struct cpu_search *high, const int match); @@ -2650,15 +2650,16 @@ static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, int indent) { + char cpusetbuf[CPUSETBUFSIZ]; int i, first; sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent, "", 1 + indent / 2, cg->cg_level); - sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "", - cg->cg_count, cg->cg_mask); + sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "", + cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); first = TRUE; for (i = 0; i < MAXCPU; i++) { - if ((cg->cg_mask & (1 << i)) != 0) { + if (CPU_ISSET(i, &cg->cg_mask)) { if (!first) sbuf_printf(sb, ", "); else diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 24963d5..eaf6427 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -49,8 +49,9 @@ static long devstat_generation = 1; static int devstat_version = DEVSTAT_VERSION; static int devstat_current_devnumber; static struct mtx devstat_mutex; +MTX_SYSINIT(devstat_mutex, &devstat_mutex, "devstat", MTX_DEF); -static struct devstatlist device_statq; +static struct devstatlist device_statq = STAILQ_HEAD_INITIALIZER(device_statq); static struct devstat *devstat_alloc(void); static void devstat_free(struct devstat *); static void devstat_add_entry(struct devstat *ds, const void *dev_name, @@ -70,13 +71,7 @@ devstat_new_entry(const void *dev_name, devstat_priority priority) { struct devstat *ds; - static int once; - if (!once) { - STAILQ_INIT(&device_statq); - mtx_init(&devstat_mutex, "devstat", NULL, MTX_DEF); - once = 1; - } mtx_assert(&devstat_mutex, MA_NOTOWNED); ds = devstat_alloc(); @@ -476,8 +471,9 @@ devstat_alloc(void) mtx_assert(&devstat_mutex, MA_NOTOWNED); if (!once) { - make_dev_credf(MAKEDEV_ETERNAL, &devstat_cdevsw, 0, NULL, - UID_ROOT, GID_WHEEL, 0400, DEVSTAT_DEVICE_NAME); + make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME, + &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400, + DEVSTAT_DEVICE_NAME); once = 1; } spp2 = NULL; diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 342c5ca..c2f6e99 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -244,29 +244,44 @@ kdb_reboot(void) #define KEY_CRTLP 16 /* ^P */ #define KEY_CRTLR 18 /* ^R */ +/* States of th KDB "alternate break sequence" detecting state machine. */ +enum { + KDB_ALT_BREAK_SEEN_NONE, + KDB_ALT_BREAK_SEEN_CR, + KDB_ALT_BREAK_SEEN_CR_TILDE, +}; + int kdb_alt_break(int key, int *state) { int brk; + /* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */ + if (key == KEY_CR) { + *state = KDB_ALT_BREAK_SEEN_CR; + return (0); + } + brk = 0; switch (*state) { - case 0: - if (key == KEY_CR) - *state = 1; - break; - case 1: + case KDB_ALT_BREAK_SEEN_CR: + *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_TILDE) - *state = 2; + *state = KDB_ALT_BREAK_SEEN_CR_TILDE; break; - case 2: + case KDB_ALT_BREAK_SEEN_CR_TILDE: + *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_CRTLB) brk = KDB_REQ_DEBUGGER; else if (key == KEY_CRTLP) brk = KDB_REQ_PANIC; else if (key == KEY_CRTLR) brk = KDB_REQ_REBOOT; - *state = 0; + break; + case KDB_ALT_BREAK_SEEN_NONE: + default: + *state = KDB_ALT_BREAK_SEEN_NONE; + break; } return (brk); } @@ -412,8 +427,9 @@ kdb_thr_ctx(struct thread *thr) return (&kdb_pcb); #if defined(SMP) && defined(KDB_STOPPEDPCB) - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask)) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc->pc_curthread == thr && + CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask)) return (KDB_STOPPEDPCB(pc)); } #endif diff --git a/sys/kern/subr_msgbuf.c b/sys/kern/subr_msgbuf.c index 14cd39d..cd9c551 100644 --- a/sys/kern/subr_msgbuf.c +++ b/sys/kern/subr_msgbuf.c @@ -31,8 +31,16 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <sys/msgbuf.h> +/* + * Maximum number conversion buffer length: uintmax_t in base 2, plus <> + * around the priority, and a terminating NUL. + */ +#define MAXPRIBUF (sizeof(intmax_t) * NBBY + 3) + /* Read/write sequence numbers are modulo a multiple of the buffer size. */ #define SEQMOD(size) ((size) * 16) @@ -51,6 +59,10 @@ msgbuf_init(struct msgbuf *mbp, void *ptr, int size) mbp->msg_seqmod = SEQMOD(size); msgbuf_clear(mbp); mbp->msg_magic = MSG_MAGIC; + mbp->msg_lastpri = -1; + mbp->msg_needsnl = 0; + bzero(&mbp->msg_lock, sizeof(mbp->msg_lock)); + mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN); } /* @@ -80,6 +92,12 @@ msgbuf_reinit(struct msgbuf *mbp, void *ptr, int size) } msgbuf_clear(mbp); } + + mbp->msg_lastpri = -1; + /* Assume that the old message buffer didn't end in a newline. */ + mbp->msg_needsnl = 1; + bzero(&mbp->msg_lock, sizeof(mbp->msg_lock)); + mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN); } /* @@ -110,25 +128,140 @@ msgbuf_getcount(struct msgbuf *mbp) } /* - * Append a character to a message buffer. This function can be - * considered fully reentrant so long as the number of concurrent - * callers is less than the number of characters in the buffer. - * However, the message buffer is only guaranteed to be consistent - * for reading when there are no callers in this function. + * Add a character into the message buffer, and update the checksum and + * sequence number. + * + * The caller should hold the message buffer spinlock. + */ +static inline void +msgbuf_do_addchar(struct msgbuf *mbp, u_int *seq, int c) +{ + u_int pos; + + /* Make sure we properly wrap the sequence number. */ + pos = MSGBUF_SEQ_TO_POS(mbp, *seq); + + mbp->msg_cksum += (u_int)c - + (u_int)(u_char)mbp->msg_ptr[pos]; + + mbp->msg_ptr[pos] = c; + + *seq = MSGBUF_SEQNORM(mbp, *seq + 1); +} + +/* + * Append a character to a message buffer. */ void msgbuf_addchar(struct msgbuf *mbp, int c) { - u_int new_seq, pos, seq; - - do { - seq = mbp->msg_wseq; - new_seq = MSGBUF_SEQNORM(mbp, seq + 1); - } while (atomic_cmpset_rel_int(&mbp->msg_wseq, seq, new_seq) == 0); - pos = MSGBUF_SEQ_TO_POS(mbp, seq); - atomic_add_int(&mbp->msg_cksum, (u_int)(u_char)c - - (u_int)(u_char)mbp->msg_ptr[pos]); - mbp->msg_ptr[pos] = c; + mtx_lock_spin(&mbp->msg_lock); + + msgbuf_do_addchar(mbp, &mbp->msg_wseq, c); + + mtx_unlock_spin(&mbp->msg_lock); +} + +/* + * Append a NUL-terminated string with a priority to a message buffer. + * Filter carriage returns if the caller requests it. + * + * XXX The carriage return filtering behavior is present in the + * msglogchar() API, however testing has shown that we don't seem to send + * carriage returns down this path. So do we still need it? + */ +void +msgbuf_addstr(struct msgbuf *mbp, int pri, char *str, int filter_cr) +{ + u_int seq; + size_t len, prefix_len; + char prefix[MAXPRIBUF]; + int nl, i; + + len = strlen(str); + prefix_len = 0; + nl = 0; + + /* If we have a zero-length string, no need to do anything. */ + if (len == 0) + return; + + mtx_lock_spin(&mbp->msg_lock); + + /* + * If this is true, we may need to insert a new priority sequence, + * so prepare the prefix. + */ + if (pri != -1) + prefix_len = sprintf(prefix, "<%d>", pri); + + /* + * Starting write sequence number. + */ + seq = mbp->msg_wseq; + + /* + * Whenever there is a change in priority, we have to insert a + * newline, and a priority prefix if the priority is not -1. Here + * we detect whether there was a priority change, and whether we + * did not end with a newline. If that is the case, we need to + * insert a newline before this string. + */ + if (mbp->msg_lastpri != pri && mbp->msg_needsnl != 0) { + + msgbuf_do_addchar(mbp, &seq, '\n'); + mbp->msg_needsnl = 0; + } + + for (i = 0; i < len; i++) { + /* + * If we just had a newline, and the priority is not -1 + * (and therefore prefix_len != 0), then we need a priority + * prefix for this line. + */ + if (mbp->msg_needsnl == 0 && prefix_len != 0) { + int j; + + for (j = 0; j < prefix_len; j++) + msgbuf_do_addchar(mbp, &seq, prefix[j]); + } + + /* + * Don't copy carriage returns if the caller requested + * filtering. + * + * XXX This matches the behavior of msglogchar(), but is it + * necessary? Testing has shown that we don't seem to get + * carriage returns here. + */ + if ((filter_cr != 0) && (str[i] == '\r')) + continue; + + /* + * Clear this flag if we see a newline. This affects whether + * we need to insert a new prefix or insert a newline later. + */ + if (str[i] == '\n') + mbp->msg_needsnl = 0; + else + mbp->msg_needsnl = 1; + + msgbuf_do_addchar(mbp, &seq, str[i]); + } + /* + * Update the write sequence number for the actual number of + * characters we put in the message buffer. (Depends on whether + * carriage returns are filtered.) + */ + mbp->msg_wseq = seq; + + /* + * Set the last priority. + */ + mbp->msg_lastpri = pri; + + mtx_unlock_spin(&mbp->msg_lock); + } /* @@ -141,14 +274,21 @@ msgbuf_getchar(struct msgbuf *mbp) u_int len, wseq; int c; + mtx_lock_spin(&mbp->msg_lock); + wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (-1); + } if (len > mbp->msg_size) mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); c = (u_char)mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, mbp->msg_rseq)]; mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + 1); + + mtx_unlock_spin(&mbp->msg_lock); + return (c); } @@ -161,10 +301,14 @@ msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen) { u_int len, pos, wseq; + mtx_lock_spin(&mbp->msg_lock); + wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (0); + } if (len > mbp->msg_size) { mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); len = mbp->msg_size; @@ -175,6 +319,9 @@ msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen) bcopy(&mbp->msg_ptr[pos], buf, len); mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + len); + + mtx_unlock_spin(&mbp->msg_lock); + return (len); } @@ -193,16 +340,21 @@ msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen, u_int *seqp) { u_int len, pos, wseq; + mtx_lock_spin(&mbp->msg_lock); + if (buf == NULL) { /* Just initialise *seqp. */ *seqp = MSGBUF_SEQNORM(mbp, mbp->msg_wseq - mbp->msg_size); + mtx_unlock_spin(&mbp->msg_lock); return (0); } wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, *seqp); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (0); + } if (len > mbp->msg_size) { *seqp = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); len = mbp->msg_size; @@ -212,6 +364,9 @@ msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen, u_int *seqp) len = min(len, (u_int)buflen); bcopy(&mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, *seqp)], buf, len); *seqp = MSGBUF_SEQNORM(mbp, *seqp + len); + + mtx_unlock_spin(&mbp->msg_lock); + return (len); } diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index de5cafc..a6b3ae0 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -74,7 +74,7 @@ static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); static struct sx dpcpu_lock; uintptr_t dpcpu_off[MAXCPU]; struct pcpu *cpuid_to_pcpu[MAXCPU]; -struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); +struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead); /* * Initialize the MI portions of a struct pcpu. @@ -87,9 +87,9 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) KASSERT(cpuid >= 0 && cpuid < MAXCPU, ("pcpu_init: invalid cpuid %d", cpuid)); pcpu->pc_cpuid = cpuid; - pcpu->pc_cpumask = 1 << cpuid; + CPU_SETOF(cpuid, &pcpu->pc_cpumask); cpuid_to_pcpu[cpuid] = pcpu; - SLIST_INSERT_HEAD(&cpuhead, pcpu, pc_allcpu); + STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue; pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue; @@ -245,7 +245,7 @@ void pcpu_destroy(struct pcpu *pcpu) { - SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); + STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; dpcpu_off[pcpu->pc_cpuid] = 0; } diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index d0d2ad7..48f2dd9 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -94,6 +94,7 @@ struct snprintf_arg { extern int log_open; static void msglogchar(int c, int pri); +static void msglogstr(char *str, int pri, int filter_cr); static void putchar(int ch, void *arg); static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper); static void snprintf_func(int ch, void *arg); @@ -106,6 +107,14 @@ TUNABLE_INT("kern.log_console_output", &log_console_output); SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RW, &log_console_output, 0, "Duplicate console output to the syslog."); +/* + * See the comment in log_console() below for more explanation of this. + */ +static int log_console_add_linefeed = 0; +TUNABLE_INT("kern.log_console_add_linefeed", &log_console_add_linefeed); +SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RW, + &log_console_add_linefeed, 0, "log_console() adds extra newlines."); + static int always_console_output = 0; TUNABLE_INT("kern.always_console_output", &always_console_output); SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RW, @@ -154,6 +163,7 @@ uprintf(const char *fmt, ...) goto out; } pca.flags = TOTTY; + pca.p_bufr = NULL; va_start(ap, fmt); tty_lock(pca.tty); retval = kvprintf(fmt, putchar, &pca, 10, ap); @@ -197,6 +207,7 @@ tprintf(struct proc *p, int pri, const char *fmt, ...) pca.pri = pri; pca.tty = tp; pca.flags = flags; + pca.p_bufr = NULL; va_start(ap, fmt); if (pca.tty != NULL) tty_lock(pca.tty); @@ -225,6 +236,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...) va_start(ap, fmt); pca.tty = tp; pca.flags = TOTTY; + pca.p_bufr = NULL; retval = kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); return (retval); @@ -240,16 +252,37 @@ log(int level, const char *fmt, ...) { va_list ap; struct putchar_arg pca; +#ifdef PRINTF_BUFR_SIZE + char bufr[PRINTF_BUFR_SIZE]; +#endif pca.tty = NULL; pca.pri = level; pca.flags = log_open ? TOLOG : TOCONS; +#ifdef PRINTF_BUFR_SIZE + pca.p_bufr = bufr; + pca.p_next = pca.p_bufr; + pca.n_bufr = sizeof(bufr); + pca.remain = sizeof(bufr); + *pca.p_next = '\0'; +#else pca.p_bufr = NULL; +#endif va_start(ap, fmt); kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); +#ifdef PRINTF_BUFR_SIZE + /* Write any buffered console/log output: */ + if (*pca.p_bufr != '\0') { + if (pca.flags & TOLOG) + msglogstr(pca.p_bufr, level, /*filter_cr*/1); + + if (pca.flags & TOCONS) + cnputs(pca.p_bufr); + } +#endif msgbuftrigger = 1; } @@ -258,7 +291,7 @@ log(int level, const char *fmt, ...) void log_console(struct uio *uio) { - int c, i, error, nl; + int c, error, nl; char *consbuffer; int pri; @@ -271,20 +304,48 @@ log_console(struct uio *uio) nl = 0; while (uio->uio_resid > 0) { - c = imin(uio->uio_resid, CONSCHUNK); + c = imin(uio->uio_resid, CONSCHUNK - 1); error = uiomove(consbuffer, c, uio); if (error != 0) break; - for (i = 0; i < c; i++) { - msglogchar(consbuffer[i], pri); - if (consbuffer[i] == '\n') - nl = 1; - else - nl = 0; - } + /* Make sure we're NUL-terminated */ + consbuffer[c] = '\0'; + if (consbuffer[c - 1] == '\n') + nl = 1; + else + nl = 0; + msglogstr(consbuffer, pri, /*filter_cr*/ 1); + } + /* + * The previous behavior in log_console() is preserved when + * log_console_add_linefeed is non-zero. For that behavior, if an + * individual console write came in that was not terminated with a + * line feed, it would add a line feed. + * + * This results in different data in the message buffer than + * appears on the system console (which doesn't add extra line feed + * characters). + * + * A number of programs and rc scripts write a line feed, or a period + * and a line feed when they have completed their operation. On + * the console, this looks seamless, but when displayed with + * 'dmesg -a', you wind up with output that looks like this: + * + * Updating motd: + * . + * + * On the console, it looks like this: + * Updating motd:. + * + * We could add logic to detect that situation, or just not insert + * the extra newlines. Set the kern.log_console_add_linefeed + * sysctl/tunable variable to get the old behavior. + */ + if (!nl && log_console_add_linefeed) { + consbuffer[0] = '\n'; + consbuffer[1] = '\0'; + msglogstr(consbuffer, pri, /*filter_cr*/ 1); } - if (!nl) - msglogchar('\n', pri); msgbuftrigger = 1; free(uio, M_IOV); free(consbuffer, M_TEMP); @@ -330,9 +391,11 @@ vprintf(const char *fmt, va_list ap) retval = kvprintf(fmt, putchar, &pca, 10, ap); #ifdef PRINTF_BUFR_SIZE - /* Write any buffered console output: */ - if (*pca.p_bufr != '\0') + /* Write any buffered console/log output: */ + if (*pca.p_bufr != '\0') { cnputs(pca.p_bufr); + msglogstr(pca.p_bufr, pca.pri, /*filter_cr*/ 1); + } #endif if (!panicstr) @@ -342,18 +405,18 @@ vprintf(const char *fmt, va_list ap) } static void -putcons(int c, struct putchar_arg *ap) +putbuf(int c, struct putchar_arg *ap) { /* Check if no console output buffer was provided. */ - if (ap->p_bufr == NULL) + if (ap->p_bufr == NULL) { /* Output direct to the console. */ - cnputc(c); - else { + if (ap->flags & TOCONS) + cnputc(c); + + if (ap->flags & TOLOG) + msglogchar(c, ap->pri); + } else { /* Buffer the character: */ - if (c == '\n') { - *ap->p_next++ = '\r'; - ap->remain--; - } *ap->p_next++ = c; ap->remain--; @@ -361,12 +424,35 @@ putcons(int c, struct putchar_arg *ap) *ap->p_next = '\0'; /* Check if the buffer needs to be flushed. */ - if (ap->remain < 3 || c == '\n') { - cnputs(ap->p_bufr); + if (ap->remain == 2 || c == '\n') { + + if (ap->flags & TOLOG) + msglogstr(ap->p_bufr, ap->pri, /*filter_cr*/1); + + if (ap->flags & TOCONS) { + if ((panicstr == NULL) && (constty != NULL)) + msgbuf_addstr(&consmsgbuf, -1, + ap->p_bufr, /*filter_cr*/ 0); + + if ((constty == NULL) ||(always_console_output)) + cnputs(ap->p_bufr); + } + ap->p_next = ap->p_bufr; ap->remain = ap->n_bufr; *ap->p_next = '\0'; } + + /* + * Since we fill the buffer up one character at a time, + * this should not happen. We should always catch it when + * ap->remain == 2 (if not sooner due to a newline), flush + * the buffer and move on. One way this could happen is + * if someone sets PRINTF_BUFR_SIZE to 1 or something + * similarly silly. + */ + KASSERT(ap->remain > 2, ("Bad buffer logic, remain = %zd", + ap->remain)); } } @@ -381,26 +467,25 @@ putchar(int c, void *arg) struct putchar_arg *ap = (struct putchar_arg*) arg; struct tty *tp = ap->tty; int flags = ap->flags; + int putbuf_done = 0; /* Don't use the tty code after a panic or while in ddb. */ if (kdb_active) { if (c != '\0') cnputc(c); - } else if (panicstr || ((flags & TOCONS) && constty == NULL)) { - if (c != '\0') - putcons(c, ap); } else { - if ((flags & TOTTY) && tp != NULL) + if ((panicstr == NULL) && (flags & TOTTY) && (tp != NULL)) tty_putchar(tp, c); + if (flags & TOCONS) { - if (constty != NULL) - msgbuf_addchar(&consmsgbuf, c); - if (always_console_output && c != '\0') - putcons(c, ap); + putbuf(c, ap); + putbuf_done = 1; } } - if ((flags & TOLOG)) - msglogchar(c, ap->pri); + if ((flags & TOLOG) && (putbuf_done == 0)) { + if (c != '\0') + putbuf(c, ap); + } } /* @@ -890,6 +975,15 @@ msglogchar(int c, int pri) } } +static void +msglogstr(char *str, int pri, int filter_cr) +{ + if (!msgbufmapped) + return; + + msgbuf_addstr(msgbufp, pri, str, filter_cr); +} + void msgbufinit(void *ptr, int size) { diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c index 3014b19..abd72c0 100644 --- a/sys/kern/subr_rman.c +++ b/sys/kern/subr_rman.c @@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r) * without freeing anything. */ r->r_flags &= ~RF_ALLOCATED; + r->r_dev = NULL; return 0; } diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 67774d8..c38177b 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$"); #include "opt_sched.h" #ifdef SMP -volatile cpumask_t stopped_cpus; -volatile cpumask_t started_cpus; -cpumask_t hlt_cpus_mask; -cpumask_t logical_cpus_mask; +volatile cpuset_t stopped_cpus; +volatile cpuset_t started_cpus; +cpuset_t hlt_cpus_mask; +cpuset_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif /* This is used in modules that need to work in both SMP and UP. */ -cpumask_t all_cpus; +cpuset_t all_cpus; int mp_ncpus; /* export this for libkvm consumers. */ @@ -200,8 +200,11 @@ forward_signal(struct thread *td) * */ static int -generic_stop_cpus(cpumask_t map, u_int type) +generic_stop_cpus(cpuset_t map, u_int type) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif static volatile u_int stopping_cpu = NOCPU; int i; @@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type) if (!smp_started) return (0); - CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type); + CTR2(KTR_SMP, "stop_cpus(%s) with %u type", + cpusetobj_strprint(cpusetbuf, &map), type); if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, @@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type) ipi_selected(map, type); i = 0; - while ((stopped_cpus & map) != map) { + while (!CPU_SUBSET(&stopped_cpus, &map)) { /* spin */ cpu_spinwait(); i++; @@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type) } int -stop_cpus(cpumask_t map) +stop_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP)); } int -stop_cpus_hard(cpumask_t map) +stop_cpus_hard(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP_HARD)); @@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map) #if defined(__amd64__) int -suspend_cpus(cpumask_t map) +suspend_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_SUSPEND)); @@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map) * 1: ok */ int -restart_cpus(cpumask_t map) +restart_cpus(cpuset_t map) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif if (!smp_started) return 0; - CTR1(KTR_SMP, "restart_cpus(%x)", map); + CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); /* signal other cpus to restart */ - atomic_store_rel_int(&started_cpus, map); + CPU_COPY_STORE_REL(&map, &started_cpus); /* wait for each to clear its bit */ - while ((stopped_cpus & map) != 0) + while (CPU_OVERLAP(&stopped_cpus, &map)) cpu_spinwait(); return 1; @@ -348,11 +355,11 @@ smp_rendezvous_action(void) * cannot use a regular critical section however as having * critical_exit() preempt from this routine would also be * problematic (the preemption must not occur before the IPI - * has been acknowleged via an EOI). Instead, we + * has been acknowledged via an EOI). Instead, we * intentionally ignore td_owepreempt when leaving the - * critical setion. This should be harmless because we do not - * permit rendezvous action routines to schedule threads, and - * thus td_owepreempt should never transition from 0 to 1 + * critical section. This should be harmless because we do + * not permit rendezvous action routines to schedule threads, + * and thus td_owepreempt should never transition from 0 to 1 * during this routine. */ td = curthread; @@ -409,13 +416,13 @@ smp_rendezvous_action(void) } void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { - int i, ncpus = 0; + int curcpumap, i, ncpus = 0; if (!smp_started) { if (setup_func != NULL) @@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map, } CPU_FOREACH(i) { - if (((1 << i) & map) != 0) + if (CPU_ISSET(i, &map)) ncpus++; } if (ncpus == 0) - panic("ncpus is 0 with map=0x%x", map); + panic("ncpus is 0 with non-zero map"); mtx_lock_spin(&smp_ipi_mtx); @@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map, * Signal other processors, which will enter the IPI with * interrupts off. */ - ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS); + curcpumap = CPU_ISSET(curcpu, &map); + CPU_CLR(curcpu, &map); + ipi_selected(map, IPI_RENDEZVOUS); /* Check if the current CPU is in the map */ - if ((map & (1 << curcpu)) != 0) + if (curcpumap != 0) smp_rendezvous_action(); /* @@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU]; struct cpu_group * smp_topo(void) { + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; struct cpu_group *top; /* @@ -530,9 +540,10 @@ smp_topo(void) if (top->cg_count != mp_ncpus) panic("Built bad topology at %p. CPU count %d != %d", top, top->cg_count, mp_ncpus); - if (top->cg_mask != all_cpus) - panic("Built bad topology at %p. CPU mask 0x%X != 0x%X", - top, top->cg_mask, all_cpus); + if (CPU_CMP(&top->cg_mask, &all_cpus)) + panic("Built bad topology at %p. CPU mask (%s) != (%s)", + top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), + cpusetobj_strprint(cpusetbuf2, &all_cpus)); return (top); } @@ -557,11 +568,13 @@ static int smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, int count, int flags, int start) { - cpumask_t mask; + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; + cpuset_t mask; int i; - for (mask = 0, i = 0; i < count; i++, start++) - mask |= (1 << start); + CPU_ZERO(&mask); + for (i = 0; i < count; i++, start++) + CPU_SET(start, &mask); child->cg_parent = parent; child->cg_child = NULL; child->cg_children = 0; @@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, child->cg_mask = mask; parent->cg_children++; for (; parent != NULL; parent = parent->cg_parent) { - if ((parent->cg_mask & child->cg_mask) != 0) - panic("Duplicate children in %p. mask 0x%X child 0x%X", - parent, parent->cg_mask, child->cg_mask); - parent->cg_mask |= child->cg_mask; + if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) + panic("Duplicate children in %p. mask (%s) child (%s)", + parent, + cpusetobj_strprint(cpusetbuf, &parent->cg_mask), + cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); + CPU_OR(&parent->cg_mask, &child->cg_mask); parent->cg_count += child->cg_count; } @@ -634,20 +649,20 @@ struct cpu_group * smp_topo_find(struct cpu_group *top, int cpu) { struct cpu_group *cg; - cpumask_t mask; + cpuset_t mask; int children; int i; - mask = (1 << cpu); + CPU_SETOF(cpu, &mask); cg = top; for (;;) { - if ((cg->cg_mask & mask) == 0) + if (!CPU_OVERLAP(&cg->cg_mask, &mask)) return (NULL); if (cg->cg_children == 0) return (cg); children = cg->cg_children; for (i = 0, cg = cg->cg_child; i < children; cg++, i++) - if ((cg->cg_mask & mask) != 0) + if (CPU_OVERLAP(&cg->cg_mask, &mask)) break; } return (NULL); @@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu) #else /* !SMP */ void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index a7f280a..ee36b35 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -829,10 +829,22 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) case PT_ATTACH: /* security check done above */ + /* + * It would be nice if the tracing relationship was separate + * from the parent relationship but that would require + * another set of links in the proc struct or for "wait" + * to scan the entire proc table. To make life easier, + * we just re-parent the process we're trying to trace. + * The old parent is remembered so we can put things back + * on a "detach". + */ p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; - if (p->p_pptr != td->td_proc) + if (p->p_pptr != td->td_proc) { + /* Remember that a child is being debugged(traced). */ + p->p_pptr->p_dbg_child++; proc_reparent(p, td->td_proc); + } data = SIGSTOP; goto sendsig; /* in PT_CONTINUE below */ @@ -919,11 +931,12 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) PROC_UNLOCK(pp); PROC_LOCK(p); proc_reparent(p, pp); + p->p_pptr->p_dbg_child--; if (pp == initproc) p->p_sigparent = SIGCHLD; } - p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); p->p_oppid = 0; + p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); /* should we send SIGCHLD? */ /* childproc_continued(p); */ diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 54a050f..3334fc2 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1845,10 +1845,16 @@ dontblock: } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); - if (error) { - SOCKBUF_UNLOCK(&so->so_rcv); - goto release; + /* + * We could receive some data while was notifying + * the protocol. Skip blocking in this case. + */ + if (so->so_rcv.sb_mb == NULL) { + error = sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } } m = so->so_rcv.sb_mb; if (m != NULL) diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a4bbdba..19aaee0 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -747,6 +747,10 @@ kern_sendit(td, s, mp, flags, control, segflg) return (error); so = (struct socket *)fp->f_data; +#ifdef KTRACE + if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) + ktrsockaddr(mp->msg_name); +#endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 00681ca..2743089 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3999,10 +3999,11 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS); db_printf( "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n" - "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_dep = %p\n", + "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, " + "b_dep = %p\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno, - bp->b_dep.lh_first); + (intmax_t)bp->b_lblkno, bp->b_dep.lh_first); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); |