diff options
author | gjb <gjb@FreeBSD.org> | 2015-05-01 18:07:18 +0000 |
---|---|---|
committer | gjb <gjb@FreeBSD.org> | 2015-05-01 18:07:18 +0000 |
commit | 55e5b36d38ff95ded004ed97fe9ed303abb36184 (patch) | |
tree | ca54c9ba1af0ca2eb29e395b069252993a4e1ada /sys/kern | |
parent | dfb432f9b57e5f768243247ed35fbe38bb9c40f1 (diff) | |
parent | fecee76e5a26a3b0a0cac30b34c660389e57579e (diff) | |
download | FreeBSD-src-55e5b36d38ff95ded004ed97fe9ed303abb36184.zip FreeBSD-src-55e5b36d38ff95ded004ed97fe9ed303abb36184.tar.gz |
MFH: r281855-r282312
Sponsored by: The FreeBSD Foundation
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/imgact_elf.c | 14 | ||||
-rw-r--r-- | sys/kern/kern_descrip.c | 38 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 8 | ||||
-rw-r--r-- | sys/kern/kern_gzio.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_intr.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_jail.c | 22 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_racct.c | 98 | ||||
-rw-r--r-- | sys/kern/kern_rctl.c | 69 | ||||
-rw-r--r-- | sys/kern/kern_shutdown.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_thr.c | 26 | ||||
-rw-r--r-- | sys/kern/link_elf.c | 2 | ||||
-rw-r--r-- | sys/kern/link_elf_obj.c | 2 | ||||
-rw-r--r-- | sys/kern/sched_4bsd.c | 2 | ||||
-rw-r--r-- | sys/kern/subr_dnvlist.c | 166 | ||||
-rw-r--r-- | sys/kern/subr_nvlist.c | 807 | ||||
-rw-r--r-- | sys/kern/subr_nvpair.c | 379 | ||||
-rw-r--r-- | sys/kern/subr_param.c | 4 | ||||
-rw-r--r-- | sys/kern/subr_trap.c | 22 | ||||
-rw-r--r-- | sys/kern/sysv_msg.c | 42 | ||||
-rw-r--r-- | sys/kern/sysv_sem.c | 25 | ||||
-rw-r--r-- | sys/kern/sysv_shm.c | 80 | ||||
-rw-r--r-- | sys/kern/uipc_shm.c | 2 | ||||
-rw-r--r-- | sys/kern/vfs_aio.c | 206 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 107 |
27 files changed, 722 insertions, 1423 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 39e4df3..3ff3440 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1238,12 +1238,14 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) coresize = round_page(hdrsize + notesz) + seginfo.size; #ifdef RACCT - PROC_LOCK(td->td_proc); - error = racct_add(td->td_proc, RACCT_CORE, coresize); - PROC_UNLOCK(td->td_proc); - if (error != 0) { - error = EFAULT; - goto done; + if (racct_enable) { + PROC_LOCK(td->td_proc); + error = racct_add(td->td_proc, RACCT_CORE, coresize); + PROC_UNLOCK(td->td_proc); + if (error != 0) { + error = EFAULT; + goto done; + } } #endif if (coresize >= limit) { diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 5290957..329f418 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -857,13 +857,15 @@ do_dup(struct thread *td, int flags, int old, int new) * the limit on the size of the file descriptor table. */ #ifdef RACCT - PROC_LOCK(p); - error = racct_set(p, RACCT_NOFILE, new + 1); - PROC_UNLOCK(p); - if (error != 0) { - FILEDESC_XUNLOCK(fdp); - fdrop(fp, td); - return (EMFILE); + if (racct_enable) { + PROC_LOCK(p); + error = racct_set(p, RACCT_NOFILE, new + 1); + PROC_UNLOCK(p); + if (error != 0) { + FILEDESC_XUNLOCK(fdp); + fdrop(fp, td); + return (EMFILE); + } } #endif fdgrowtable_exp(fdp, new + 1); @@ -1609,7 +1611,7 @@ fdalloc(struct thread *td, int minfd, int *result) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; - int fd = -1, maxfd, allocfd; + int fd, maxfd, allocfd; #ifdef RACCT int error; #endif @@ -1631,11 +1633,13 @@ fdalloc(struct thread *td, int minfd, int *result) if (fd >= fdp->fd_nfiles) { allocfd = min(fd * 2, maxfd); #ifdef RACCT - PROC_LOCK(p); - error = racct_set(p, RACCT_NOFILE, allocfd); - PROC_UNLOCK(p); - if (error != 0) - return (EMFILE); + if (racct_enable) { + PROC_LOCK(p); + error = racct_set(p, RACCT_NOFILE, allocfd); + PROC_UNLOCK(p); + if (error != 0) + return (EMFILE); + } #endif /* * fd is already equal to first free descriptor >= minfd, so @@ -2042,9 +2046,11 @@ fdescfree(struct thread *td) MPASS(fdp != NULL); #ifdef RACCT - PROC_LOCK(td->td_proc); - racct_set(td->td_proc, RACCT_NOFILE, 0); - PROC_UNLOCK(td->td_proc); + if (racct_enable) { + PROC_LOCK(td->td_proc); + racct_set(td->td_proc, RACCT_NOFILE, 0); + PROC_UNLOCK(td->td_proc); + } #endif if (td->td_proc->p_fdtol != NULL) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index ecc2651..9d893f8 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1060,7 +1060,7 @@ exec_new_vmspace(imgp, sv) /* Allocate a new stack */ if (imgp->stack_sz != 0) { - ssiz = imgp->stack_sz; + ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 4d23fc0..0a601a1 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -907,9 +907,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) * Destroy resource accounting information associated with the process. */ #ifdef RACCT - PROC_LOCK(p); - racct_sub(p, RACCT_NPROC, 1); - PROC_UNLOCK(p); + if (racct_enable) { + PROC_LOCK(p); + racct_sub(p, RACCT_NPROC, 1); + PROC_UNLOCK(p); + } #endif racct_proc_exit(p); diff --git a/sys/kern/kern_gzio.c b/sys/kern/kern_gzio.c index a4974a7..cee21f0 100644 --- a/sys/kern/kern_gzio.c +++ b/sys/kern/kern_gzio.c @@ -32,8 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/gzio.h> #include <sys/kernel.h> #include <sys/malloc.h> - -#include <net/zutil.h> +#include <sys/zutil.h> #define KERN_GZ_HDRLEN 10 /* gzip header length */ #define KERN_GZ_TRAILERLEN 8 /* gzip trailer length */ diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 7a5d936..a84019a 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -1455,12 +1455,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) /* Schedule the ithread if needed. */ if (thread) { error = intr_event_schedule_thread(ie); -#ifndef XEN KASSERT(error == 0, ("bad stray interrupt")); -#else - if (error != 0) - log(LOG_WARNING, "bad stray interrupt"); -#endif } critical_exit(); td->td_intr_nesting_level--; diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 5b23129..c87c4e2 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1778,7 +1778,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) mtx_unlock(&pr->pr_mtx); #ifdef RACCT - if (created) + if (racct_enable && created) prison_racct_attach(pr); #endif @@ -1862,7 +1862,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) } #ifdef RACCT - if (!created) { + if (racct_enable && !created) { if (!(flags & JAIL_ATTACH)) sx_sunlock(&allprison_lock); prison_racct_modify(pr); @@ -2652,7 +2652,8 @@ prison_deref(struct prison *pr, int flags) cpuset_rel(pr->pr_cpuset); osd_jail_exit(pr); #ifdef RACCT - prison_racct_detach(pr); + if (racct_enable) + prison_racct_detach(pr); #endif free(pr, M_PRISON); @@ -4460,12 +4461,15 @@ SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW, SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the zfs file system"); +#ifdef RACCT void prison_racct_foreach(void (*callback)(struct racct *racct, void *arg2, void *arg3), void *arg2, void *arg3) { struct prison_racct *prr; + ASSERT_RACCT_ENABLED(); + sx_slock(&allprison_lock); LIST_FOREACH(prr, &allprison_racct, prr_next) (callback)(prr->prr_racct, arg2, arg3); @@ -4477,6 +4481,7 @@ prison_racct_find_locked(const char *name) { struct prison_racct *prr; + ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN) @@ -4507,6 +4512,8 @@ prison_racct_find(const char *name) { struct prison_racct *prr; + ASSERT_RACCT_ENABLED(); + sx_xlock(&allprison_lock); prr = prison_racct_find_locked(name); sx_xunlock(&allprison_lock); @@ -4517,6 +4524,8 @@ void prison_racct_hold(struct prison_racct *prr) { + ASSERT_RACCT_ENABLED(); + refcount_acquire(&prr->prr_refcount); } @@ -4524,6 +4533,7 @@ static void prison_racct_free_locked(struct prison_racct *prr) { + ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (refcount_release(&prr->prr_refcount)) { @@ -4538,6 +4548,7 @@ prison_racct_free(struct prison_racct *prr) { int old; + ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); old = prr->prr_refcount; @@ -4549,12 +4560,12 @@ prison_racct_free(struct prison_racct *prr) sx_xunlock(&allprison_lock); } -#ifdef RACCT static void prison_racct_attach(struct prison *pr) { struct prison_racct *prr; + ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); prr = prison_racct_find_locked(pr->pr_name); @@ -4574,6 +4585,8 @@ prison_racct_modify(struct prison *pr) struct ucred *cred; struct prison_racct *oldprr; + ASSERT_RACCT_ENABLED(); + sx_slock(&allproc_lock); sx_xlock(&allprison_lock); @@ -4613,6 +4626,7 @@ static void prison_racct_detach(struct prison *pr) { + ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); if (pr->pr_prison_racct == NULL) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 505521d..6618c08 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -2822,7 +2822,7 @@ static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE sysctl_kern_proc, "Return process table, no threads"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, - CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, + CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_args, "Process argument list"); static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE, diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 05becea..5cee140 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -70,8 +70,15 @@ FEATURE(racct, "Resource Accounting"); * Do not block processes that have their %cpu usage <= pcpu_threshold. */ static int pcpu_threshold = 1; +#ifdef RACCT_DISABLED +int racct_enable = 0; +#else +int racct_enable = 1; +#endif SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); +SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, + 0, "Enable RACCT/RCTL"); SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 0, "Processes with higher %cpu usage than this value can be throttled."); @@ -313,6 +320,8 @@ racct_getpcpu(struct proc *p, u_int pcpu) fixpt_t p_pctcpu; struct thread *td; + ASSERT_RACCT_ENABLED(); + /* * If the process is swapped out, we count its %cpu usage as zero. * This behaviour is consistent with the userland ps(1) tool. @@ -377,6 +386,7 @@ racct_add_racct(struct racct *dest, const struct racct *src) { int i; + ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); /* @@ -398,6 +408,7 @@ racct_sub_racct(struct racct *dest, const struct racct *src) { int i; + ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); /* @@ -431,6 +442,9 @@ void racct_create(struct racct **racctp) { + if (!racct_enable) + return; + SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0); KASSERT(*racctp == NULL, ("racct already allocated")); @@ -444,6 +458,8 @@ racct_destroy_locked(struct racct **racctp) int i; struct racct *racct; + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0); mtx_assert(&racct_lock, MA_OWNED); @@ -470,6 +486,9 @@ void racct_destroy(struct racct **racct) { + if (!racct_enable) + return; + mtx_lock(&racct_lock); racct_destroy_locked(racct); mtx_unlock(&racct_lock); @@ -485,6 +504,7 @@ racct_alloc_resource(struct racct *racct, int resource, uint64_t amount) { + ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); KASSERT(racct != NULL, ("NULL racct")); @@ -516,6 +536,8 @@ racct_add_locked(struct proc *p, int resource, uint64_t amount) int error; #endif + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0); /* @@ -546,6 +568,9 @@ racct_add(struct proc *p, int resource, uint64_t amount) { int error; + if (!racct_enable) + return (0); + mtx_lock(&racct_lock); error = racct_add_locked(p, resource, amount); mtx_unlock(&racct_lock); @@ -557,6 +582,8 @@ racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) { struct prison *pr; + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, rusage, add__cred, cred, resource, amount, 0, 0); @@ -577,6 +604,9 @@ void racct_add_cred(struct ucred *cred, int resource, uint64_t amount) { + if (!racct_enable) + return; + mtx_lock(&racct_lock); racct_add_cred_locked(cred, resource, amount); mtx_unlock(&racct_lock); @@ -590,6 +620,9 @@ void racct_add_force(struct proc *p, int resource, uint64_t amount) { + if (!racct_enable) + return; + SDT_PROBE(racct, kernel, rusage, add__force, p, resource, amount, 0, 0); /* @@ -612,6 +645,8 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount) int error; #endif + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); /* @@ -671,6 +706,9 @@ racct_set(struct proc *p, int resource, uint64_t amount) { int error; + if (!racct_enable) + return (0); + mtx_lock(&racct_lock); error = racct_set_locked(p, resource, amount); mtx_unlock(&racct_lock); @@ -683,6 +721,8 @@ racct_set_force_locked(struct proc *p, int resource, uint64_t amount) int64_t old_amount, decayed_amount; int64_t diff_proc, diff_cred; + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0); /* @@ -717,6 +757,10 @@ racct_set_force_locked(struct proc *p, int resource, uint64_t amount) void racct_set_force(struct proc *p, int resource, uint64_t amount) { + + if (!racct_enable) + return; + mtx_lock(&racct_lock); racct_set_force_locked(p, resource, amount); mtx_unlock(&racct_lock); @@ -732,6 +776,9 @@ uint64_t racct_get_limit(struct proc *p, int resource) { + if (!racct_enable) + return (UINT64_MAX); + #ifdef RCTL return (rctl_get_limit(p, resource)); #else @@ -749,6 +796,9 @@ uint64_t racct_get_available(struct proc *p, int resource) { + if (!racct_enable) + return (UINT64_MAX); + #ifdef RCTL return (rctl_get_available(p, resource)); #else @@ -765,6 +815,8 @@ static int64_t racct_pcpu_available(struct proc *p) { + ASSERT_RACCT_ENABLED(); + #ifdef RCTL return (rctl_pcpu_available(p)); #else @@ -779,6 +831,9 @@ void racct_sub(struct proc *p, int resource, uint64_t amount) { + if (!racct_enable) + return; + SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0); /* @@ -804,6 +859,8 @@ racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) { struct prison *pr; + ASSERT_RACCT_ENABLED(); + SDT_PROBE(racct, kernel, rusage, sub__cred, cred, resource, amount, 0, 0); @@ -827,6 +884,9 @@ void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) { + if (!racct_enable) + return; + mtx_lock(&racct_lock); racct_sub_cred_locked(cred, resource, amount); mtx_unlock(&racct_lock); @@ -840,6 +900,9 @@ racct_proc_fork(struct proc *parent, struct proc *child) { int i, error = 0; + if (!racct_enable) + return (0); + /* * Create racct for the child process. */ @@ -896,6 +959,9 @@ racct_proc_fork_done(struct proc *child) { #ifdef RCTL + if (!racct_enable) + return; + PROC_LOCK(child); mtx_lock(&racct_lock); rctl_enforce(child, RACCT_NPROC, 0); @@ -913,6 +979,9 @@ racct_proc_exit(struct proc *p) struct timeval wallclock; uint64_t pct_estimate, pct; + if (!racct_enable) + return; + PROC_LOCK(p); /* * We don't need to calculate rux, proc_reap() has already done this. @@ -967,6 +1036,9 @@ racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, struct loginclass *oldlc, *newlc; struct prison *oldpr, *newpr, *pr; + if (!racct_enable) + return; + PROC_LOCK_ASSERT(p, MA_NOTOWNED); newuip = newcred->cr_ruidinfo; @@ -1004,6 +1076,8 @@ void racct_move(struct racct *dest, struct racct *src) { + ASSERT_RACCT_ENABLED(); + mtx_lock(&racct_lock); racct_add_racct(dest, src); @@ -1020,6 +1094,7 @@ racct_proc_throttle(struct proc *p) int cpuid; #endif + ASSERT_RACCT_ENABLED(); PROC_LOCK_ASSERT(p, MA_OWNED); /* @@ -1065,6 +1140,9 @@ racct_proc_throttle(struct proc *p) static void racct_proc_wakeup(struct proc *p) { + + ASSERT_RACCT_ENABLED(); + PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_throttled) { @@ -1079,6 +1157,8 @@ racct_decay_resource(struct racct *racct, void * res, void* dummy) int resource; int64_t r_old, r_new; + ASSERT_RACCT_ENABLED(); + resource = *(int *)res; r_old = racct->r_resources[resource]; @@ -1095,6 +1175,9 @@ racct_decay_resource(struct racct *racct, void * res, void* dummy) static void racct_decay(int resource) { + + ASSERT_RACCT_ENABLED(); + ui_racct_foreach(racct_decay_resource, &resource, NULL); loginclass_racct_foreach(racct_decay_resource, &resource, NULL); prison_racct_foreach(racct_decay_resource, &resource, NULL); @@ -1109,6 +1192,8 @@ racctd(void) uint64_t runtime; uint64_t pct, pct_estimate; + ASSERT_RACCT_ENABLED(); + for (;;) { racct_decay(RACCT_PCTCPU); @@ -1188,11 +1273,22 @@ static struct kproc_desc racctd_kp = { racctd, NULL }; -SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp); + +static void +racctd_init(void) +{ + if (!racct_enable) + return; + + kproc_start(&racctd_kp); +} +SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); static void racct_init(void) { + if (!racct_enable) + return; racct_zone = uma_zcreate("racct", sizeof(struct racct), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c index 934327a..c43b83d 100644 --- a/sys/kern/kern_rctl.c +++ b/sys/kern/kern_rctl.c @@ -225,6 +225,7 @@ rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) int64_t available = INT64_MAX; struct ucred *cred = p->p_ucred; + ASSERT_RACCT_ENABLED(); rw_assert(&rctl_lock, RA_LOCKED); resource = rule->rr_resource; @@ -264,6 +265,8 @@ rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule, { int64_t available; + ASSERT_RACCT_ENABLED(); + rw_assert(&rctl_lock, RA_LOCKED); available = rctl_available_resource(p, rule); @@ -283,6 +286,8 @@ rctl_pcpu_available(const struct proc *p) { struct rctl_rule_link *link; int64_t available, minavailable, limit; + ASSERT_RACCT_ENABLED(); + minavailable = INT64_MAX; limit = 0; @@ -334,6 +339,8 @@ rctl_enforce(struct proc *p, int resource, uint64_t amount) static int curtime = 0; static struct timeval lasttime; + ASSERT_RACCT_ENABLED(); + rw_rlock(&rctl_lock); /* @@ -457,6 +464,8 @@ rctl_get_limit(struct proc *p, int resource) struct rctl_rule_link *link; uint64_t amount = UINT64_MAX; + ASSERT_RACCT_ENABLED(); + rw_rlock(&rctl_lock); /* @@ -487,6 +496,8 @@ rctl_get_available(struct proc *p, int resource) minavailable = INT64_MAX; + ASSERT_RACCT_ENABLED(); + rw_rlock(&rctl_lock); /* @@ -521,6 +532,8 @@ static int rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) { + ASSERT_RACCT_ENABLED(); + if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { if (rule->rr_subject_type != filter->rr_subject_type) return (0); @@ -635,6 +648,7 @@ rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) { struct rctl_rule_link *link; + ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); rctl_rule_acquire(rule); @@ -652,6 +666,7 @@ rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) { struct rctl_rule_link *link; + ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); rw_assert(&rctl_lock, RA_WLOCKED); @@ -678,6 +693,7 @@ rctl_racct_remove_rules(struct racct *racct, int removed = 0; struct rctl_rule_link *link, *linktmp; + ASSERT_RACCT_ENABLED(); rw_assert(&rctl_lock, RA_WLOCKED); LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { @@ -696,6 +712,8 @@ static void rctl_rule_acquire_subject(struct rctl_rule *rule) { + ASSERT_RACCT_ENABLED(); + switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: case RCTL_SUBJECT_TYPE_PROCESS: @@ -722,6 +740,8 @@ static void rctl_rule_release_subject(struct rctl_rule *rule) { + ASSERT_RACCT_ENABLED(); + switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: case RCTL_SUBJECT_TYPE_PROCESS: @@ -749,6 +769,8 @@ rctl_rule_alloc(int flags) { struct rctl_rule *rule; + ASSERT_RACCT_ENABLED(); + rule = uma_zalloc(rctl_rule_zone, flags); if (rule == NULL) return (NULL); @@ -771,6 +793,8 @@ rctl_rule_duplicate(const struct rctl_rule *rule, int flags) { struct rctl_rule *copy; + ASSERT_RACCT_ENABLED(); + copy = uma_zalloc(rctl_rule_zone, flags); if (copy == NULL) return (NULL); @@ -793,6 +817,7 @@ void rctl_rule_acquire(struct rctl_rule *rule) { + ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); refcount_acquire(&rule->rr_refcount); @@ -805,6 +830,7 @@ rctl_rule_free(void *context, int pending) rule = (struct rctl_rule *)context; + ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); /* @@ -819,6 +845,7 @@ void rctl_rule_release(struct rctl_rule *rule) { + ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); if (refcount_release(&rule->rr_refcount)) { @@ -838,6 +865,8 @@ static int rctl_rule_fully_specified(const struct rctl_rule *rule) { + ASSERT_RACCT_ENABLED(); + switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: return (0); @@ -882,6 +911,8 @@ rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) struct rctl_rule *rule; id_t id; + ASSERT_RACCT_ENABLED(); + rule = rctl_rule_alloc(M_WAITOK); subjectstr = strsep(&rulestr, ":"); @@ -1008,6 +1039,7 @@ rctl_rule_add(struct rctl_rule *rule) struct rctl_rule *rule2; int match; + ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); /* @@ -1118,6 +1150,8 @@ rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) struct rctl_rule *filter = (struct rctl_rule *)arg2; int found = 0; + ASSERT_RACCT_ENABLED(); + rw_wlock(&rctl_lock); found += rctl_racct_remove_rules(racct, filter); rw_wunlock(&rctl_lock); @@ -1134,6 +1168,8 @@ rctl_rule_remove(struct rctl_rule *filter) int found = 0; struct proc *p; + ASSERT_RACCT_ENABLED(); + if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && filter->rr_subject.rs_proc != NULL) { p = filter->rr_subject.rs_proc; @@ -1172,6 +1208,8 @@ rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) { int64_t amount; + ASSERT_RACCT_ENABLED(); + sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); switch (rule->rr_subject_type) { @@ -1231,6 +1269,8 @@ rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) int error; char *str; + ASSERT_RACCT_ENABLED(); + if (inbuflen <= 0) return (EINVAL); if (inbuflen > RCTL_MAX_INBUFLEN) @@ -1256,6 +1296,8 @@ rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) { int error; + ASSERT_RACCT_ENABLED(); + if (outputsbuf == NULL) return (0); @@ -1277,6 +1319,8 @@ rctl_racct_to_sbuf(struct racct *racct, int sloppy) int64_t amount; struct sbuf *sb; + ASSERT_RACCT_ENABLED(); + sb = sbuf_new_auto(); for (i = 0; i <= RACCT_MAX; i++) { if (sloppy == 0 && RACCT_IS_SLOPPY(i)) @@ -1302,6 +1346,9 @@ sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) struct loginclass *lc; struct prison_racct *prr; + if (!racct_enable) + return (ENOSYS); + error = priv_check(td, PRIV_RCTL_GET_RACCT); if (error != 0) return (error); @@ -1372,6 +1419,8 @@ rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) struct rctl_rule_link *link; struct sbuf *sb = (struct sbuf *)arg3; + ASSERT_RACCT_ENABLED(); + rw_rlock(&rctl_lock); LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { if (!rctl_rule_matches(link->rrl_rule, filter)) @@ -1393,6 +1442,9 @@ sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) struct rctl_rule_link *link; struct proc *p; + if (!racct_enable) + return (ENOSYS); + error = priv_check(td, PRIV_RCTL_GET_RULES); if (error != 0) return (error); @@ -1467,6 +1519,9 @@ sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) struct rctl_rule *filter; struct rctl_rule_link *link; + if (!racct_enable) + return (ENOSYS); + error = priv_check(td, PRIV_RCTL_GET_LIMITS); if (error != 0) return (error); @@ -1538,6 +1593,9 @@ sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) struct rctl_rule *rule; char *inputstr; + if (!racct_enable) + return (ENOSYS); + error = priv_check(td, PRIV_RCTL_ADD_RULE); if (error != 0) return (error); @@ -1580,6 +1638,9 @@ sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) struct rctl_rule *filter; char *inputstr; + if (!racct_enable) + return (ENOSYS); + error = priv_check(td, PRIV_RCTL_REMOVE_RULE); if (error != 0) return (error); @@ -1616,6 +1677,8 @@ rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) struct prison_racct *newprr; LIST_HEAD(, rctl_rule_link) newrules; + ASSERT_RACCT_ENABLED(); + newuip = newcred->cr_ruidinfo; newlc = newcred->cr_loginclass; newprr = newcred->cr_prison->pr_prison_racct; @@ -1756,6 +1819,7 @@ rctl_proc_fork(struct proc *parent, struct proc *child) LIST_INIT(&child->p_racct->r_rule_links); + ASSERT_RACCT_ENABLED(); KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); rw_wlock(&rctl_lock); @@ -1809,6 +1873,8 @@ rctl_racct_release(struct racct *racct) { struct rctl_rule_link *link; + ASSERT_RACCT_ENABLED(); + rw_wlock(&rctl_lock); while (!LIST_EMPTY(&racct->r_rule_links)) { link = LIST_FIRST(&racct->r_rule_links); @@ -1823,6 +1889,9 @@ static void rctl_init(void) { + if (!racct_enable) + return; + rctl_rule_link_zone = uma_zcreate("rctl_rule_link", sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index e547c5f..998ee15 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -154,7 +154,6 @@ static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); -static void vpanic(const char *fmt, va_list ap) __dead2; /* register various local shutdown events */ static void @@ -676,7 +675,7 @@ panic(const char *fmt, ...) vpanic(fmt, ap); } -static void +void vpanic(const char *fmt, va_list ap) { #ifdef SMP diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index ad10dc7..a238a09 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -66,12 +66,6 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> -#ifdef XEN -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#endif - #define KTDSTATE(td) \ (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ @@ -475,9 +469,6 @@ mi_switch(int flags, struct thread *newtd) "lockname:\"%s\"", td->td_lockname); #endif SDT_PROBE0(sched, , , preempt); -#ifdef XEN - PT_UPDATES_FLUSH(); -#endif sched_switch(td, newtd, flags); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 280bc0b..6911bb97 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -187,11 +187,13 @@ create_thread(struct thread *td, mcontext_t *ctx, } #ifdef RACCT - PROC_LOCK(td->td_proc); - error = racct_add(p, RACCT_NTHR, 1); - PROC_UNLOCK(td->td_proc); - if (error != 0) - return (EPROCLIM); + if (racct_enable) { + PROC_LOCK(p); + error = racct_add(p, RACCT_NTHR, 1); + PROC_UNLOCK(p); + if (error != 0) + return (EPROCLIM); + } #endif /* Initialize our td */ @@ -250,9 +252,9 @@ create_thread(struct thread *td, mcontext_t *ctx, } } - PROC_LOCK(td->td_proc); - td->td_proc->p_flag |= P_HADTHREADS; - thread_link(newtd, p); + PROC_LOCK(p); + p->p_flag |= P_HADTHREADS; + thread_link(newtd, p); bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); thread_lock(td); /* let the scheduler know about these things. */ @@ -280,9 +282,11 @@ create_thread(struct thread *td, mcontext_t *ctx, fail: #ifdef RACCT - PROC_LOCK(p); - racct_sub(p, RACCT_NTHR, 1); - PROC_UNLOCK(p); + if (racct_enable) { + PROC_LOCK(p); + racct_sub(p, RACCT_NTHR, 1); + PROC_UNLOCK(p); + } #endif return (error); } diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 65c8276..26be035 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); #include <sys/link_elf.h> #ifdef DDB_CTF -#include <net/zlib.h> +#include <sys/zlib.h> #endif #include "linker_if.h" diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 5c107fe..021381d 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$"); #include <sys/link_elf.h> #ifdef DDB_CTF -#include <net/zlib.h> +#include <sys/zlib.h> #endif #include "linker_if.h" diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 3e39d55..59bd387 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -1585,7 +1585,7 @@ sched_pctcpu(struct thread *td) return (ts->ts_pctcpu); } -#ifdef RACCT +#ifdef RACCT /* * Calculates the contribution to the thread cpu usage for the latest * (unfinished) second. diff --git a/sys/kern/subr_dnvlist.c b/sys/kern/subr_dnvlist.c index fe17684..9058520 100644 --- a/sys/kern/subr_dnvlist.c +++ b/sys/kern/subr_dnvlist.c @@ -89,89 +89,6 @@ dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep, return (value); } -#ifndef _KERNEL -#define DNVLIST_GETF(ftype, type) \ -ftype \ -dnvlist_getf_##type(const nvlist_t *nvl, ftype defval, \ - const char *namefmt, ...) \ -{ \ - va_list nameap; \ - ftype value; \ - \ - va_start(nameap, namefmt); \ - value = dnvlist_getv_##type(nvl, defval, namefmt, nameap); \ - va_end(nameap); \ - \ - return (value); \ -} - -DNVLIST_GETF(bool, bool) -DNVLIST_GETF(uint64_t, number) -DNVLIST_GETF(const char *, string) -DNVLIST_GETF(const nvlist_t *, nvlist) -DNVLIST_GETF(int, descriptor) - -#undef DNVLIST_GETF - -const void * -dnvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, - size_t defsize, const char *namefmt, ...) -{ - va_list nameap; - const void *value; - - va_start(nameap, namefmt); - value = dnvlist_getv_binary(nvl, sizep, defval, defsize, namefmt, - nameap); - va_end(nameap); - - return (value); -} - -#define DNVLIST_GETV(ftype, type) \ -ftype \ -dnvlist_getv_##type(const nvlist_t *nvl, ftype defval, \ - const char *namefmt, va_list nameap) \ -{ \ - char *name; \ - ftype value; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - return (defval); \ - value = dnvlist_get_##type(nvl, name, defval); \ - free(name); \ - return (value); \ -} - -DNVLIST_GETV(bool, bool) -DNVLIST_GETV(uint64_t, number) -DNVLIST_GETV(const char *, string) -DNVLIST_GETV(const nvlist_t *, nvlist) -DNVLIST_GETV(int, descriptor) - -#undef DNVLIST_GETV - -const void * -dnvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, - size_t defsize, const char *namefmt, va_list nameap) -{ - char *name; - const void *value; - - nv_vasprintf(&name, namefmt, nameap); - if (name != NULL) { - value = dnvlist_get_binary(nvl, name, sizep, defval, defsize); - nv_free(name); - } else { - if (sizep != NULL) - *sizep = defsize; - value = defval; - } - return (value); -} -#endif - #define DNVLIST_TAKE(ftype, type) \ ftype \ dnvlist_take_##type(nvlist_t *nvl, const char *name, ftype defval) \ @@ -209,86 +126,3 @@ dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep, return (value); } -#ifndef _KERNEL -#define DNVLIST_TAKEF(ftype, type) \ -ftype \ -dnvlist_takef_##type(nvlist_t *nvl, ftype defval, \ - const char *namefmt, ...) \ -{ \ - va_list nameap; \ - ftype value; \ - \ - va_start(nameap, namefmt); \ - value = dnvlist_takev_##type(nvl, defval, namefmt, nameap); \ - va_end(nameap); \ - \ - return (value); \ -} - -DNVLIST_TAKEF(bool, bool) -DNVLIST_TAKEF(uint64_t, number) -DNVLIST_TAKEF(char *, string) -DNVLIST_TAKEF(nvlist_t *, nvlist) -DNVLIST_TAKEF(int, descriptor) - -#undef DNVLIST_TAKEF - -void * -dnvlist_takef_binary(nvlist_t *nvl, size_t *sizep, void *defval, - size_t defsize, const char *namefmt, ...) -{ - va_list nameap; - void *value; - - va_start(nameap, namefmt); - value = dnvlist_takev_binary(nvl, sizep, defval, defsize, namefmt, - nameap); - va_end(nameap); - - return (value); -} - -#define DNVLIST_TAKEV(ftype, type) \ -ftype \ -dnvlist_takev_##type(nvlist_t *nvl, ftype defval, const char *namefmt, \ - va_list nameap) \ -{ \ - char *name; \ - ftype value; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - return (defval); \ - value = dnvlist_take_##type(nvl, name, defval); \ - free(name); \ - return (value); \ -} - -DNVLIST_TAKEV(bool, bool) -DNVLIST_TAKEV(uint64_t, number) -DNVLIST_TAKEV(char *, string) -DNVLIST_TAKEV(nvlist_t *, nvlist) -DNVLIST_TAKEV(int, descriptor) - -#undef DNVLIST_TAKEV - -void * -dnvlist_takev_binary(nvlist_t *nvl, size_t *sizep, void *defval, - size_t defsize, const char *namefmt, va_list nameap) -{ - char *name; - void *value; - - nv_vasprintf(&name, namefmt, nameap); - if (name != NULL) { - value = dnvlist_take_binary(nvl, name, sizep, defval, defsize); - nv_free(name); - } else { - if (sizep != NULL) - *sizep = defsize; - value = defval; - } - - return (value); -} -#endif diff --git a/sys/kern/subr_nvlist.c b/sys/kern/subr_nvlist.c index f352c62..f96b890 100644 --- a/sys/kern/subr_nvlist.c +++ b/sys/kern/subr_nvlist.c @@ -142,12 +142,11 @@ void nvlist_destroy(nvlist_t *nvl) { nvpair_t *nvp; - int serrno; if (nvl == NULL) return; - SAVE_ERRNO(serrno); + ERRNO_SAVE(); NVLIST_ASSERT(nvl); @@ -158,7 +157,7 @@ nvlist_destroy(nvlist_t *nvl) nvl->nvl_magic = 0; nv_free(nvl); - RESTORE_ERRNO(serrno); + ERRNO_RESTORE(); } void @@ -231,6 +230,17 @@ nvlist_empty(const nvlist_t *nvl) return (nvlist_first_nvpair(nvl) == NULL); } +int +nvlist_flags(const nvlist_t *nvl) +{ + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT((nvl->nvl_flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); + + return (nvl->nvl_flags); +} + static void nvlist_report_missing(int type, const char *name) { @@ -264,7 +274,7 @@ nvlist_find(const nvlist_t *nvl, int type, const char *name) } if (nvp == NULL) - RESTORE_ERRNO(ENOENT); + ERRNO_SET(ENOENT); return (nvp); } @@ -281,37 +291,6 @@ nvlist_exists_type(const nvlist_t *nvl, const char *name, int type) return (nvlist_find(nvl, type, name) != NULL); } -#ifndef _KERNEL -bool -nvlist_existsf_type(const nvlist_t *nvl, int type, const char *namefmt, ...) -{ - va_list nameap; - bool ret; - - va_start(nameap, namefmt); - ret = nvlist_existsv_type(nvl, type, namefmt, nameap); - va_end(nameap); - - return (ret); -} - -bool -nvlist_existsv_type(const nvlist_t *nvl, int type, const char *namefmt, - va_list nameap) -{ - char *name; - bool exists; - - nv_vasprintf(&name, namefmt, nameap); - if (name == NULL) - return (false); - - exists = nvlist_exists_type(nvl, name, type); - nv_free(name); - return (exists); -} -#endif - void nvlist_free_type(nvlist_t *nvl, const char *name, int type) { @@ -329,30 +308,6 @@ nvlist_free_type(nvlist_t *nvl, const char *name, int type) nvlist_report_missing(type, name); } -#ifndef _KERNEL -void -nvlist_freef_type(nvlist_t *nvl, int type, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_freev_type(nvl, type, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_freev_type(nvlist_t *nvl, int type, const char *namefmt, va_list nameap) -{ - char *name; - - nv_vasprintf(&name, namefmt, nameap); - if (name == NULL) - nvlist_report_missing(type, "<unknown>"); - nvlist_free_type(nvl, name, type); - nv_free(name); -} -#endif - nvlist_t * nvlist_clone(const nvlist_t *nvl) { @@ -362,7 +317,7 @@ nvlist_clone(const nvlist_t *nvl) NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { - RESTORE_ERRNO(nvl->nvl_error); + ERRNO_SET(nvl->nvl_error); return (NULL); } @@ -533,27 +488,30 @@ out: #ifndef _KERNEL static int * -nvlist_xdescriptors(const nvlist_t *nvl, int *descs, int level) +nvlist_xdescriptors(const nvlist_t *nvl, int *descs) { - const nvpair_t *nvp; + nvpair_t *nvp; + const char *name; + int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); - PJDLOG_ASSERT(level < 3); - for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; - nvp = nvlist_next_nvpair(nvl, nvp)) { - switch (nvpair_type(nvp)) { - case NV_TYPE_DESCRIPTOR: - *descs = nvpair_get_descriptor(nvp); - descs++; - break; - case NV_TYPE_NVLIST: - descs = nvlist_xdescriptors(nvpair_get_nvlist(nvp), - descs, level + 1); - break; + nvp = NULL; + do { + while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) { + switch (type) { + case NV_TYPE_DESCRIPTOR: + *descs = nvpair_get_descriptor(nvp); + descs++; + break; + case NV_TYPE_NVLIST: + nvl = nvpair_get_nvlist(nvp); + nvp = NULL; + break; + } } - } + } while ((nvl = nvlist_get_parent(nvl, (void**)&nvp)) != NULL); return (descs); } @@ -571,7 +529,7 @@ nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp) if (fds == NULL) return (NULL); if (nitems > 0) - nvlist_xdescriptors(nvl, fds, 0); + nvlist_xdescriptors(nvl, fds); fds[nitems] = -1; if (nitemsp != NULL) *nitemsp = nitems; @@ -579,30 +537,33 @@ nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp) } #endif -static size_t -nvlist_xndescriptors(const nvlist_t *nvl, int level) +size_t +nvlist_ndescriptors(const nvlist_t *nvl) { #ifndef _KERNEL - const nvpair_t *nvp; + nvpair_t *nvp; + const char *name; size_t ndescs; + int type; NVLIST_ASSERT(nvl); PJDLOG_ASSERT(nvl->nvl_error == 0); - PJDLOG_ASSERT(level < 3); ndescs = 0; - for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; - nvp = nvlist_next_nvpair(nvl, nvp)) { - switch (nvpair_type(nvp)) { - case NV_TYPE_DESCRIPTOR: - ndescs++; - break; - case NV_TYPE_NVLIST: - ndescs += nvlist_xndescriptors(nvpair_get_nvlist(nvp), - level + 1); - break; + nvp = NULL; + do { + while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) { + switch (type) { + case NV_TYPE_DESCRIPTOR: + ndescs++; + break; + case NV_TYPE_NVLIST: + nvl = nvpair_get_nvlist(nvp); + nvp = NULL; + break; + } } - } + } while ((nvl = nvlist_get_parent(nvl, (void**)&nvp)) != NULL); return (ndescs); #else @@ -610,13 +571,6 @@ nvlist_xndescriptors(const nvlist_t *nvl, int level) #endif } -size_t -nvlist_ndescriptors(const nvlist_t *nvl) -{ - - return (nvlist_xndescriptors(nvl, 0)); -} - static unsigned char * nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp) { @@ -640,7 +594,7 @@ nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp) return (ptr); } -void * +static void * nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) { unsigned char *buf, *ptr; @@ -652,7 +606,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { - RESTORE_ERRNO(nvl->nvl_error); + ERRNO_SET(nvl->nvl_error); return (NULL); } @@ -742,12 +696,12 @@ nvlist_pack(const nvlist_t *nvl, size_t *sizep) NVLIST_ASSERT(nvl); if (nvl->nvl_error != 0) { - RESTORE_ERRNO(nvl->nvl_error); + ERRNO_SET(nvl->nvl_error); return (NULL); } if (nvlist_ndescriptors(nvl) > 0) { - RESTORE_ERRNO(EOPNOTSUPP); + ERRNO_SET(EOPNOTSUPP); return (NULL); } @@ -759,11 +713,11 @@ nvlist_check_header(struct nvlist_header *nvlhdrp) { if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (false); } if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (false); } #if BYTE_ORDER == BIG_ENDIAN @@ -815,11 +769,11 @@ nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds, return (ptr); failed: - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } -nvlist_t * +static nvlist_t * nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds) { const unsigned char *ptr; @@ -909,10 +863,10 @@ nvlist_send(int sock, const nvlist_t *nvl) int *fds; void *data; int64_t fdidx; - int serrno, ret; + int ret; if (nvlist_error(nvl) != 0) { - errno = nvlist_error(nvl); + ERRNO_SET(nvlist_error(nvl)); return (-1); } @@ -938,10 +892,10 @@ nvlist_send(int sock, const nvlist_t *nvl) ret = 0; out: - serrno = errno; + ERRNO_SAVE(); free(fds); free(data); - errno = serrno; + ERRNO_RESTORE(); return (ret); } @@ -952,7 +906,7 @@ nvlist_recv(int sock) nvlist_t *nvl, *ret; unsigned char *buf; size_t nfds, size, i; - int serrno, *fds; + int *fds; if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1) return (NULL); @@ -963,7 +917,7 @@ nvlist_recv(int sock) nfds = (size_t)nvlhdr.nvlh_descriptors; size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size; - buf = malloc(size); + buf = nv_malloc(size); if (buf == NULL) return (NULL); @@ -976,7 +930,7 @@ nvlist_recv(int sock) goto out; if (nfds > 0) { - fds = malloc(nfds * sizeof(fds[0])); + fds = nv_malloc(nfds * sizeof(fds[0])); if (fds == NULL) goto out; if (fd_recv(sock, fds, nfds) == -1) @@ -985,17 +939,19 @@ nvlist_recv(int sock) nvl = nvlist_xunpack(buf, size, fds, nfds); if (nvl == NULL) { + ERRNO_SAVE(); for (i = 0; i < nfds; i++) close(fds[i]); + ERRNO_RESTORE(); goto out; } ret = nvl; out: - serrno = errno; + ERRNO_SAVE(); free(buf); free(fds); - errno = serrno; + ERRNO_RESTORE(); return (ret); } @@ -1100,86 +1056,6 @@ NVLIST_EXISTS(binary, BINARY) #undef NVLIST_EXISTS -#ifndef _KERNEL -bool -nvlist_existsf(const nvlist_t *nvl, const char *namefmt, ...) -{ - va_list nameap; - bool ret; - - va_start(nameap, namefmt); - ret = nvlist_existsv(nvl, namefmt, nameap); - va_end(nameap); - return (ret); -} - -#define NVLIST_EXISTSF(type) \ -bool \ -nvlist_existsf_##type(const nvlist_t *nvl, const char *namefmt, ...) \ -{ \ - va_list nameap; \ - bool ret; \ - \ - va_start(nameap, namefmt); \ - ret = nvlist_existsv_##type(nvl, namefmt, nameap); \ - va_end(nameap); \ - return (ret); \ -} - -NVLIST_EXISTSF(null) -NVLIST_EXISTSF(bool) -NVLIST_EXISTSF(number) -NVLIST_EXISTSF(string) -NVLIST_EXISTSF(nvlist) -#ifndef _KERNEL -NVLIST_EXISTSF(descriptor) -#endif -NVLIST_EXISTSF(binary) - -#undef NVLIST_EXISTSF - -bool -nvlist_existsv(const nvlist_t *nvl, const char *namefmt, va_list nameap) -{ - char *name; - bool exists; - - nv_vasprintf(&name, namefmt, nameap); - if (name == NULL) - return (false); - - exists = nvlist_exists(nvl, name); - nv_free(name); - return (exists); -} - -#define NVLIST_EXISTSV(type) \ -bool \ -nvlist_existsv_##type(const nvlist_t *nvl, const char *namefmt, \ - va_list nameap) \ -{ \ - char *name; \ - bool exists; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - return (false); \ - exists = nvlist_exists_##type(nvl, name); \ - free(name); \ - return (exists); \ -} - -NVLIST_EXISTSV(null) -NVLIST_EXISTSV(bool) -NVLIST_EXISTSV(number) -NVLIST_EXISTSV(string) -NVLIST_EXISTSV(nvlist) -NVLIST_EXISTSV(descriptor) -NVLIST_EXISTSV(binary) - -#undef NVLIST_EXISTSV -#endif - void nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) { @@ -1188,19 +1064,19 @@ nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) NVPAIR_ASSERT(nvp); if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } if (nvlist_exists(nvl, nvpair_name(nvp))) { nvl->nvl_error = EEXIST; - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } newnvp = nvpair_clone(nvp); if (newnvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } @@ -1208,34 +1084,6 @@ nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) } void -nvlist_add_null(nvlist_t *nvl, const char *name) -{ - - nvlist_addf_null(nvl, "%s", name); -} - -void -nvlist_add_bool(nvlist_t *nvl, const char *name, bool value) -{ - - nvlist_addf_bool(nvl, value, "%s", name); -} - -void -nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value) -{ - - nvlist_addf_number(nvl, value, "%s", name); -} - -void -nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) -{ - - nvlist_addf_string(nvl, value, "%s", name); -} - -void nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) { va_list valueap; @@ -1252,213 +1100,117 @@ nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } nvp = nvpair_create_stringv(name, valuefmt, valueap); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value) -{ - - nvlist_addf_nvlist(nvl, value, "%s", name); -} - -#ifndef _KERNEL -void -nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value) -{ - - nvlist_addf_descriptor(nvl, value, "%s", name); -} -#endif - -void -nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, - size_t size) -{ - - nvlist_addf_binary(nvl, value, size, "%s", name); -} - -void -nvlist_addf_null(nvlist_t *nvl, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_null(nvl, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_addf_bool(nvlist_t *nvl, bool value, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_bool(nvl, value, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_addf_number(nvlist_t *nvl, uint64_t value, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_number(nvl, value, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_addf_string(nvlist_t *nvl, const char *value, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_string(nvl, value, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_addf_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, - ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_nvlist(nvl, value, namefmt, nameap); - va_end(nameap); -} - -#ifndef _KERNEL -void -nvlist_addf_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_descriptor(nvl, value, namefmt, nameap); - va_end(nameap); -} -#endif - -void -nvlist_addf_binary(nvlist_t *nvl, const void *value, size_t size, - const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_addv_binary(nvl, value, size, namefmt, nameap); - va_end(nameap); -} - -void -nvlist_addv_null(nvlist_t *nvl, const char *namefmt, va_list nameap) +nvlist_add_null(nvlist_t *nvl, const char *name) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_null(namefmt, nameap); + nvp = nvpair_create_null(name); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_addv_bool(nvlist_t *nvl, bool value, const char *namefmt, va_list nameap) +nvlist_add_bool(nvlist_t *nvl, const char *name, bool value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_bool(value, namefmt, nameap); + nvp = nvpair_create_bool(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_addv_number(nvlist_t *nvl, uint64_t value, const char *namefmt, - va_list nameap) +nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_number(value, namefmt, nameap); + nvp = nvpair_create_number(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_addv_string(nvlist_t *nvl, const char *value, const char *namefmt, - va_list nameap) +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_string(value, namefmt, nameap); + nvp = nvpair_create_string(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_addv_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, - va_list nameap) +nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_nvlist(value, namefmt, nameap); + nvp = nvpair_create_nvlist(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } #ifndef _KERNEL void -nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, - va_list nameap) +nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value) { nvpair_t *nvp; @@ -1467,7 +1219,7 @@ nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, return; } - nvp = nvpair_createv_descriptor(value, namefmt, nameap); + nvp = nvpair_create_descriptor(name, value); if (nvp == NULL) nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); else @@ -1476,22 +1228,23 @@ nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, #endif void -nvlist_addv_binary(nvlist_t *nvl, const void *value, size_t size, - const char *namefmt, va_list nameap) +nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, + size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_createv_binary(value, size, namefmt, nameap); + nvp = nvpair_create_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void @@ -1503,153 +1256,100 @@ nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp) if (nvlist_error(nvl) != 0) { nvpair_free(nvp); - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } if (nvlist_exists(nvl, nvpair_name(nvp))) { nvpair_free(nvp); nvl->nvl_error = EEXIST; - RESTORE_ERRNO(nvl->nvl_error); + ERRNO_SET(nvl->nvl_error); return; } nvpair_insert(&nvl->nvl_head, nvp, nvl); } -#define NVLIST_MOVE(vtype, type) \ -void \ -nvlist_move_##type(nvlist_t *nvl, const char *name, vtype value) \ -{ \ - \ - nvlist_movef_##type(nvl, value, "%s", name); \ -} - -NVLIST_MOVE(char *, string) -NVLIST_MOVE(nvlist_t *, nvlist) -#ifndef _KERNEL -NVLIST_MOVE(int, descriptor) -#endif - -#undef NVLIST_MOVE - -void -nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size) -{ - - nvlist_movef_binary(nvl, value, size, "%s", name); -} - -#define NVLIST_MOVEF(vtype, type) \ -void \ -nvlist_movef_##type(nvlist_t *nvl, vtype value, const char *namefmt, \ - ...) \ -{ \ - va_list nameap; \ - \ - va_start(nameap, namefmt); \ - nvlist_movev_##type(nvl, value, namefmt, nameap); \ - va_end(nameap); \ -} - -NVLIST_MOVEF(char *, string) -NVLIST_MOVEF(nvlist_t *, nvlist) -#ifndef _KERNEL -NVLIST_MOVEF(int, descriptor) -#endif - -#undef NVLIST_MOVEF - -void -nvlist_movef_binary(nvlist_t *nvl, void *value, size_t size, - const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_movev_binary(nvl, value, size, namefmt, nameap); - va_end(nameap); -} - void -nvlist_movev_string(nvlist_t *nvl, char *value, const char *namefmt, - va_list nameap) +nvlist_move_string(nvlist_t *nvl, const char *name, char *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_movev_string(value, namefmt, nameap); + nvp = nvpair_move_string(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } void -nvlist_movev_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, - va_list nameap) +nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { if (value != NULL && nvlist_get_nvpair_parent(value) != NULL) nvlist_destroy(value); - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_movev_nvlist(value, namefmt, nameap); + nvp = nvpair_move_nvlist(name, value); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } #ifndef _KERNEL void -nvlist_movev_descriptor(nvlist_t *nvl, int value, const char *namefmt, - va_list nameap) +nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { close(value); - errno = nvlist_error(nvl); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_movev_descriptor(value, namefmt, nameap); - if (nvp == NULL) - nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); - else + nvp = nvpair_move_descriptor(name, value); + if (nvp == NULL) { + nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } #endif void -nvlist_movev_binary(nvlist_t *nvl, void *value, size_t size, - const char *namefmt, va_list nameap) +nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size) { nvpair_t *nvp; if (nvlist_error(nvl) != 0) { nv_free(value); - RESTORE_ERRNO(nvlist_error(nvl)); + ERRNO_SET(nvlist_error(nvl)); return; } - nvp = nvpair_movev_binary(value, size, namefmt, nameap); + nvp = nvpair_move_binary(name, value, size); if (nvp == NULL) { nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM); - RESTORE_ERRNO(nvl->nvl_error); - } else + ERRNO_SET(nvl->nvl_error); + } else { nvlist_move_nvpair(nvl, nvp); + } } const nvpair_t * @@ -1693,84 +1393,6 @@ nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep) return (nvpair_get_binary(nvp, sizep)); } -#define NVLIST_GETF(ftype, type) \ -ftype \ -nvlist_getf_##type(const nvlist_t *nvl, const char *namefmt, ...) \ -{ \ - va_list nameap; \ - ftype value; \ - \ - va_start(nameap, namefmt); \ - value = nvlist_getv_##type(nvl, namefmt, nameap); \ - va_end(nameap); \ - \ - return (value); \ -} - -#ifndef _KERNEL -NVLIST_GETF(bool, bool) -NVLIST_GETF(uint64_t, number) -NVLIST_GETF(const char *, string) -NVLIST_GETF(const nvlist_t *, nvlist) -NVLIST_GETF(int, descriptor) - -#undef NVLIST_GETF - -const void * -nvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) -{ - va_list nameap; - const void *value; - - va_start(nameap, namefmt); - value = nvlist_getv_binary(nvl, sizep, namefmt, nameap); - va_end(nameap); - - return (value); -} - -#define NVLIST_GETV(ftype, type, TYPE) \ -ftype \ -nvlist_getv_##type(const nvlist_t *nvl, const char *namefmt, \ - va_list nameap) \ -{ \ - char *name; \ - ftype value; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \ - value = nvlist_get_##type(nvl, name); \ - free(name); \ - \ - return (value); \ -} - -NVLIST_GETV(bool, bool, BOOL) -NVLIST_GETV(uint64_t, number, NUMBER) -NVLIST_GETV(const char *, string, STRING) -NVLIST_GETV(const nvlist_t *, nvlist, NVLIST) -NVLIST_GETV(int, descriptor, DESCRIPTOR) - -#undef NVLIST_GETV - -const void * -nvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, - va_list nameap) -{ - char *name; - const void *binary; - - nv_vasprintf(&name, namefmt, nameap); - if (name == NULL) - nvlist_report_missing(NV_TYPE_BINARY, "<unknown>"); - - binary = nvlist_get_binary(nvl, name, sizep); - nv_free(name); - return (binary); -} -#endif - #define NVLIST_TAKE(ftype, type, TYPE) \ ftype \ nvlist_take_##type(nvlist_t *nvl, const char *name) \ @@ -1813,82 +1435,6 @@ nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep) return (value); } -#define NVLIST_TAKEF(ftype, type) \ -ftype \ -nvlist_takef_##type(nvlist_t *nvl, const char *namefmt, ...) \ -{ \ - va_list nameap; \ - ftype value; \ - \ - va_start(nameap, namefmt); \ - value = nvlist_takev_##type(nvl, namefmt, nameap); \ - va_end(nameap); \ - \ - return (value); \ -} - -#ifndef _KERNEL -NVLIST_TAKEF(bool, bool) -NVLIST_TAKEF(uint64_t, number) -NVLIST_TAKEF(char *, string) -NVLIST_TAKEF(nvlist_t *, nvlist) -NVLIST_TAKEF(int, descriptor) - -#undef NVLIST_TAKEF - -void * -nvlist_takef_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) -{ - va_list nameap; - void *value; - - va_start(nameap, namefmt); - value = nvlist_takev_binary(nvl, sizep, namefmt, nameap); - va_end(nameap); - - return (value); -} - -#define NVLIST_TAKEV(ftype, type, TYPE) \ -ftype \ -nvlist_takev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \ -{ \ - char *name; \ - ftype value; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \ - value = nvlist_take_##type(nvl, name); \ - free(name); \ - return (value); \ -} - -NVLIST_TAKEV(bool, bool, BOOL) -NVLIST_TAKEV(uint64_t, number, NUMBER) -NVLIST_TAKEV(char *, string, STRING) -NVLIST_TAKEV(nvlist_t *, nvlist, NVLIST) -NVLIST_TAKEV(int, descriptor, DESCRIPTOR) - -#undef NVLIST_TAKEV - -void * -nvlist_takev_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, - va_list nameap) -{ - char *name; - void *binary; - - nv_vasprintf(&name, namefmt, nameap); - if (name == NULL) - nvlist_report_missing(NV_TYPE_BINARY, "<unknown>"); - - binary = nvlist_take_binary(nvl, name, sizep); - nv_free(name); - return (binary); -} -#endif - void nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) { @@ -1927,68 +1473,6 @@ NVLIST_FREE(binary, BINARY) #undef NVLIST_FREE -#ifndef _KERNEL -void -nvlist_freef(nvlist_t *nvl, const char *namefmt, ...) -{ - va_list nameap; - - va_start(nameap, namefmt); - nvlist_freev(nvl, namefmt, nameap); - va_end(nameap); -} - -#define NVLIST_FREEF(type) \ -void \ -nvlist_freef_##type(nvlist_t *nvl, const char *namefmt, ...) \ -{ \ - va_list nameap; \ - \ - va_start(nameap, namefmt); \ - nvlist_freev_##type(nvl, namefmt, nameap); \ - va_end(nameap); \ -} - -NVLIST_FREEF(null) -NVLIST_FREEF(bool) -NVLIST_FREEF(number) -NVLIST_FREEF(string) -NVLIST_FREEF(nvlist) -NVLIST_FREEF(descriptor) -NVLIST_FREEF(binary) - -#undef NVLIST_FREEF - -void -nvlist_freev(nvlist_t *nvl, const char *namefmt, va_list nameap) -{ - - nvlist_freev_type(nvl, NV_TYPE_NONE, namefmt, nameap); -} - -#define NVLIST_FREEV(type, TYPE) \ -void \ -nvlist_freev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \ -{ \ - char *name; \ - \ - vasprintf(&name, namefmt, nameap); \ - if (name == NULL) \ - nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \ - nvlist_free_##type(nvl, name); \ - free(name); \ -} - -NVLIST_FREEV(null, NULL) -NVLIST_FREEV(bool, BOOL) -NVLIST_FREEV(number, NUMBER) -NVLIST_FREEV(string, STRING) -NVLIST_FREEV(nvlist, NVLIST) -NVLIST_FREEV(descriptor, DESCRIPTOR) -NVLIST_FREEV(binary, BINARY) -#undef NVLIST_FREEV -#endif - void nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp) { @@ -2000,3 +1484,4 @@ nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp) nvlist_remove_nvpair(nvl, nvp); nvpair_free(nvp); } + diff --git a/sys/kern/subr_nvpair.c b/sys/kern/subr_nvpair.c index 7b88e42..b0fc322 100644 --- a/sys/kern/subr_nvpair.c +++ b/sys/kern/subr_nvpair.c @@ -468,7 +468,7 @@ nvpair_unpack_header(bool isbe, nvpair_t *nvp, const unsigned char *ptr, return (ptr); failed: - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -480,7 +480,7 @@ nvpair_unpack_null(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL); if (nvp->nvp_datasize != 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -496,11 +496,11 @@ nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL); if (nvp->nvp_datasize != sizeof(value)) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } if (*leftp < sizeof(value)) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -509,7 +509,7 @@ nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr, *leftp -= sizeof(value); if (value != 0 && value != 1) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -526,11 +526,11 @@ nvpair_unpack_number(bool isbe, nvpair_t *nvp, const unsigned char *ptr, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER); if (nvp->nvp_datasize != sizeof(uint64_t)) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } if (*leftp < sizeof(uint64_t)) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -552,13 +552,13 @@ nvpair_unpack_string(bool isbe __unused, nvpair_t *nvp, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING); if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } if (strnlen((const char *)ptr, nvp->nvp_datasize) != nvp->nvp_datasize - 1) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -581,7 +581,7 @@ nvpair_unpack_nvlist(bool isbe __unused, nvpair_t *nvp, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST); if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -609,11 +609,11 @@ nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR); if (nvp->nvp_datasize != sizeof(idx)) { - errno = EINVAL; + ERRNO_SET(EINVAL); return (NULL); } if (*leftp < sizeof(idx)) { - errno = EINVAL; + ERRNO_SET(EINVAL); return (NULL); } @@ -623,12 +623,12 @@ nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr, idx = le64dec(ptr); if (idx < 0) { - errno = EINVAL; + ERRNO_SET(EINVAL); return (NULL); } if ((size_t)idx >= nfds) { - errno = EINVAL; + ERRNO_SET(EINVAL); return (NULL); } @@ -650,7 +650,7 @@ nvpair_unpack_binary(bool isbe __unused, nvpair_t *nvp, PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY); if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -716,69 +716,34 @@ nvpair_name(const nvpair_t *nvp) } static nvpair_t * -nvpair_allocv(int type, uint64_t data, size_t datasize, const char *namefmt, - va_list nameap) +nvpair_allocv(const char *name, int type, uint64_t data, size_t datasize) { nvpair_t *nvp; - char *name; - int namelen; + size_t namelen; PJDLOG_ASSERT(type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST); - namelen = nv_vasprintf(&name, namefmt, nameap); - if (namelen < 0) - return (NULL); - - PJDLOG_ASSERT(namelen > 0); + namelen = strlen(name); if (namelen >= NV_NAME_MAX) { - nv_free(name); - RESTORE_ERRNO(ENAMETOOLONG); + ERRNO_SET(ENAMETOOLONG); return (NULL); } nvp = nv_calloc(1, sizeof(*nvp) + namelen + 1); if (nvp != NULL) { nvp->nvp_name = (char *)(nvp + 1); - memcpy(nvp->nvp_name, name, namelen + 1); + memcpy(nvp->nvp_name, name, namelen); + nvp->nvp_name[namelen + 1] = '\0'; nvp->nvp_type = type; nvp->nvp_data = data; nvp->nvp_datasize = datasize; nvp->nvp_magic = NVPAIR_MAGIC; } - nv_free(name); return (nvp); }; nvpair_t * -nvpair_create_null(const char *name) -{ - - return (nvpair_createf_null("%s", name)); -} - -nvpair_t * -nvpair_create_bool(const char *name, bool value) -{ - - return (nvpair_createf_bool(value, "%s", name)); -} - -nvpair_t * -nvpair_create_number(const char *name, uint64_t value) -{ - - return (nvpair_createf_number(value, "%s", name)); -} - -nvpair_t * -nvpair_create_string(const char *name, const char *value) -{ - - return (nvpair_createf_string(value, "%s", name)); -} - -nvpair_t * nvpair_create_stringf(const char *name, const char *valuefmt, ...) { va_list valueap; @@ -808,153 +773,36 @@ nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap) } nvpair_t * -nvpair_create_nvlist(const char *name, const nvlist_t *value) -{ - - return (nvpair_createf_nvlist(value, "%s", name)); -} - -#ifndef _KERNEL -nvpair_t * -nvpair_create_descriptor(const char *name, int value) -{ - - return (nvpair_createf_descriptor(value, "%s", name)); -} -#endif - -nvpair_t * -nvpair_create_binary(const char *name, const void *value, size_t size) -{ - - return (nvpair_createf_binary(value, size, "%s", name)); -} - -nvpair_t * -nvpair_createf_null(const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_null(namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_createf_bool(bool value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_bool(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_createf_number(uint64_t value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_number(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_createf_string(const char *value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_string(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_createf_nvlist(const nvlist_t *value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_nvlist(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -#ifndef _KERNEL -nvpair_t * -nvpair_createf_descriptor(int value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_descriptor(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} -#endif - -nvpair_t * -nvpair_createf_binary(const void *value, size_t size, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_createv_binary(value, size, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_createv_null(const char *namefmt, va_list nameap) +nvpair_create_null(const char *name) { - return (nvpair_allocv(NV_TYPE_NULL, 0, 0, namefmt, nameap)); + return (nvpair_allocv(name, NV_TYPE_NULL, 0, 0)); } nvpair_t * -nvpair_createv_bool(bool value, const char *namefmt, va_list nameap) +nvpair_create_bool(const char *name, bool value) { - return (nvpair_allocv(NV_TYPE_BOOL, value ? 1 : 0, sizeof(uint8_t), - namefmt, nameap)); + return (nvpair_allocv(name, NV_TYPE_BOOL, value ? 1 : 0, + sizeof(uint8_t))); } nvpair_t * -nvpair_createv_number(uint64_t value, const char *namefmt, va_list nameap) +nvpair_create_number(const char *name, uint64_t value) { - return (nvpair_allocv(NV_TYPE_NUMBER, value, sizeof(value), namefmt, - nameap)); + return (nvpair_allocv(name, NV_TYPE_NUMBER, value, sizeof(value))); } nvpair_t * -nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) +nvpair_create_string(const char *name, const char *value) { nvpair_t *nvp; size_t size; char *data; if (value == NULL) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -963,8 +811,8 @@ nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) return (NULL); size = strlen(value) + 1; - nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)data, size, - namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)data, + size); if (nvp == NULL) nv_free(data); @@ -972,14 +820,13 @@ nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) } nvpair_t * -nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, - va_list nameap) +nvpair_create_nvlist(const char *name, const nvlist_t *value) { nvlist_t *nvl; nvpair_t *nvp; if (value == NULL) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -987,8 +834,7 @@ nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, if (nvl == NULL) return (NULL); - nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0, - namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0); if (nvp == NULL) nvlist_destroy(nvl); else @@ -999,12 +845,12 @@ nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, #ifndef _KERNEL nvpair_t * -nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap) +nvpair_create_descriptor(const char *name, int value) { nvpair_t *nvp; if (value < 0 || !fd_is_valid(value)) { - errno = EBADF; + ERRNO_SET(EBADF); return (NULL); } @@ -1012,24 +858,26 @@ nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap) if (value < 0) return (NULL); - nvp = nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value, - sizeof(int64_t), namefmt, nameap); - if (nvp == NULL) + nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value, + sizeof(int64_t)); + if (nvp == NULL) { + ERRNO_SAVE(); close(value); + ERRNO_RESTORE(); + } return (nvp); } #endif nvpair_t * -nvpair_createv_binary(const void *value, size_t size, const char *namefmt, - va_list nameap) +nvpair_create_binary(const char *name, const void *value, size_t size) { nvpair_t *nvp; void *data; if (value == NULL || size == 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } @@ -1038,8 +886,8 @@ nvpair_createv_binary(const void *value, size_t size, const char *namefmt, return (NULL); memcpy(data, value, size); - nvp = nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)data, size, - namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)data, + size); if (nvp == NULL) nv_free(data); @@ -1049,127 +897,42 @@ nvpair_createv_binary(const void *value, size_t size, const char *namefmt, nvpair_t * nvpair_move_string(const char *name, char *value) { - - return (nvpair_movef_string(value, "%s", name)); -} - -nvpair_t * -nvpair_move_nvlist(const char *name, nvlist_t *value) -{ - - return (nvpair_movef_nvlist(value, "%s", name)); -} - -#ifndef _KERNEL -nvpair_t * -nvpair_move_descriptor(const char *name, int value) -{ - - return (nvpair_movef_descriptor(value, "%s", name)); -} -#endif - -nvpair_t * -nvpair_move_binary(const char *name, void *value, size_t size) -{ - - return (nvpair_movef_binary(value, size, "%s", name)); -} - -nvpair_t * -nvpair_movef_string(char *value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_movev_string(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_movef_nvlist(nvlist_t *value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_movev_nvlist(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -#ifndef _KERNEL -nvpair_t * -nvpair_movef_descriptor(int value, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_movev_descriptor(value, namefmt, nameap); - va_end(nameap); - - return (nvp); -} -#endif - -nvpair_t * -nvpair_movef_binary(void *value, size_t size, const char *namefmt, ...) -{ - va_list nameap; - nvpair_t *nvp; - - va_start(nameap, namefmt); - nvp = nvpair_movev_binary(value, size, namefmt, nameap); - va_end(nameap); - - return (nvp); -} - -nvpair_t * -nvpair_movev_string(char *value, const char *namefmt, va_list nameap) -{ nvpair_t *nvp; - int serrno; if (value == NULL) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } - nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)value, - strlen(value) + 1, namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)value, + strlen(value) + 1); if (nvp == NULL) { - SAVE_ERRNO(serrno); + ERRNO_SAVE(); nv_free(value); - RESTORE_ERRNO(serrno); + ERRNO_RESTORE(); } return (nvp); } nvpair_t * -nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap) +nvpair_move_nvlist(const char *name, nvlist_t *value) { nvpair_t *nvp; if (value == NULL || nvlist_get_nvpair_parent(value) != NULL) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } if (nvlist_error(value) != 0) { - RESTORE_ERRNO(nvlist_error(value)); + ERRNO_SET(nvlist_error(value)); nvlist_destroy(value); return (NULL); } - nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value, 0, - namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value, + 0); if (nvp == NULL) nvlist_destroy(value); else @@ -1180,22 +943,21 @@ nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap) #ifndef _KERNEL nvpair_t * -nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap) +nvpair_move_descriptor(const char *name, int value) { nvpair_t *nvp; - int serrno; if (value < 0 || !fd_is_valid(value)) { - errno = EBADF; + ERRNO_SET(EBADF); return (NULL); } - nvp = nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value, - sizeof(int64_t), namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value, + sizeof(int64_t)); if (nvp == NULL) { - serrno = errno; + ERRNO_SAVE(); close(value); - errno = serrno; + ERRNO_RESTORE(); } return (nvp); @@ -1203,23 +965,21 @@ nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap) #endif nvpair_t * -nvpair_movev_binary(void *value, size_t size, const char *namefmt, - va_list nameap) +nvpair_move_binary(const char *name, void *value, size_t size) { nvpair_t *nvp; - int serrno; if (value == NULL || size == 0) { - RESTORE_ERRNO(EINVAL); + ERRNO_SET(EINVAL); return (NULL); } - nvp = nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)value, size, - namefmt, nameap); + nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)value, + size); if (nvp == NULL) { - SAVE_ERRNO(serrno); + ERRNO_SAVE(); nv_free(value); - RESTORE_ERRNO(serrno); + ERRNO_RESTORE(); } return (nvp); @@ -1348,3 +1108,4 @@ nvpair_type_string(int type) return ("<UNKNOWN>"); } } + diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c index f662ec2..cba656a 100644 --- a/sys/kern/subr_param.c +++ b/sys/kern/subr_param.c @@ -99,11 +99,7 @@ pid_t pid_max = PID_MAX; long maxswzone; /* max swmeta KVA storage */ long maxbcache; /* max buffer cache KVA storage */ long maxpipekva; /* Limit on pipe KVA */ -#ifdef XEN -int vm_guest = VM_GUEST_XEN; -#else int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */ -#endif u_long maxtsiz; /* max text size */ u_long dfldsiz; /* initial data size limit */ u_long maxdsiz; /* max data size */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index cfc3ed7..93f7557 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <sys/ktr.h> #include <sys/pioctl.h> #include <sys/ptrace.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/signalvar.h> @@ -79,12 +80,6 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #endif -#ifdef XEN -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#endif - #ifdef HWPMC_HOOKS #include <sys/pmckern.h> #endif @@ -135,9 +130,6 @@ userret(struct thread *td, struct trapframe *frame) * Let the scheduler adjust our priority etc. */ sched_userret(td); -#ifdef XEN - PT_UPDATES_FLUSH(); -#endif /* * Check for misbehavior. @@ -172,11 +164,13 @@ userret(struct thread *td, struct trapframe *frame) __func__, td, p->p_pid, td->td_name, curvnet, (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A")); #endif -#ifdef RACCT - PROC_LOCK(p); - while (p->p_throttled == 1) - msleep(p->p_racct, &p->p_mtx, 0, "racct", 0); - PROC_UNLOCK(p); +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(p); + while (p->p_throttled == 1) + msleep(p->p_racct, &p->p_mtx, 0, "racct", 0); + PROC_UNLOCK(p); + } #endif } diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c index acc44710..59357c4 100644 --- a/sys/kern/sysv_msg.c +++ b/sys/kern/sysv_msg.c @@ -617,12 +617,14 @@ sys_msgget(td, uap) goto done2; } #ifdef RACCT - PROC_LOCK(td->td_proc); - error = racct_add(td->td_proc, RACCT_NMSGQ, 1); - PROC_UNLOCK(td->td_proc); - if (error != 0) { - error = ENOSPC; - goto done2; + if (racct_enable) { + PROC_LOCK(td->td_proc); + error = racct_add(td->td_proc, RACCT_NMSGQ, 1); + PROC_UNLOCK(td->td_proc); + if (error != 0) { + error = ENOSPC; + goto done2; + } } #endif DPRINTF(("msqid %d is available\n", msqid)); @@ -724,20 +726,22 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype) #endif #ifdef RACCT - PROC_LOCK(td->td_proc); - if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) { - PROC_UNLOCK(td->td_proc); - error = EAGAIN; - goto done2; - } - saved_msgsz = msgsz; - if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) { - racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); + if (racct_enable) { + PROC_LOCK(td->td_proc); + if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) { + PROC_UNLOCK(td->td_proc); + error = EAGAIN; + goto done2; + } + saved_msgsz = msgsz; + if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) { + racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); + PROC_UNLOCK(td->td_proc); + error = EAGAIN; + goto done2; + } PROC_UNLOCK(td->td_proc); - error = EAGAIN; - goto done2; } - PROC_UNLOCK(td->td_proc); #endif segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz; @@ -994,7 +998,7 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype) td->td_retval[0] = 0; done3: #ifdef RACCT - if (error != 0) { + if (racct_enable && error != 0) { PROC_LOCK(td->td_proc); racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz); diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index 6ff5789..d9324f4 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -915,12 +915,14 @@ sys_semget(struct thread *td, struct semget_args *uap) goto done2; } #ifdef RACCT - PROC_LOCK(td->td_proc); - error = racct_add(td->td_proc, RACCT_NSEM, nsems); - PROC_UNLOCK(td->td_proc); - if (error != 0) { - error = ENOSPC; - goto done2; + if (racct_enable) { + PROC_LOCK(td->td_proc); + error = racct_add(td->td_proc, RACCT_NSEM, nsems); + PROC_UNLOCK(td->td_proc); + if (error != 0) { + error = ENOSPC; + goto done2; + } } #endif DPRINTF(("semid %d is available\n", semid)); @@ -1009,12 +1011,15 @@ sys_semop(struct thread *td, struct semop_args *uap) return (E2BIG); } else { #ifdef RACCT - PROC_LOCK(td->td_proc); - if (nsops > racct_get_available(td->td_proc, RACCT_NSEMOP)) { + if (racct_enable) { + PROC_LOCK(td->td_proc); + if (nsops > + racct_get_available(td->td_proc, RACCT_NSEMOP)) { + PROC_UNLOCK(td->td_proc); + return (E2BIG); + } PROC_UNLOCK(td->td_proc); - return (E2BIG); } - PROC_UNLOCK(td->td_proc); #endif sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK); diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 274deda..3240a5f 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -651,17 +651,19 @@ shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode) ("segnum %d shmalloced %d", segnum, shmalloced)); shmseg = &shmsegs[segnum]; #ifdef RACCT - PROC_LOCK(td->td_proc); - if (racct_add(td->td_proc, RACCT_NSHM, 1)) { - PROC_UNLOCK(td->td_proc); - return (ENOSPC); - } - if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) { - racct_sub(td->td_proc, RACCT_NSHM, 1); + if (racct_enable) { + PROC_LOCK(td->td_proc); + if (racct_add(td->td_proc, RACCT_NSHM, 1)) { + PROC_UNLOCK(td->td_proc); + return (ENOSPC); + } + if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) { + racct_sub(td->td_proc, RACCT_NSHM, 1); + PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } PROC_UNLOCK(td->td_proc); - return (ENOMEM); } - PROC_UNLOCK(td->td_proc); #endif /* @@ -672,10 +674,12 @@ shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode) 0, size, VM_PROT_DEFAULT, 0, cred); if (shm_object == NULL) { #ifdef RACCT - PROC_LOCK(td->td_proc); - racct_sub(td->td_proc, RACCT_NSHM, 1); - racct_sub(td->td_proc, RACCT_SHMSIZE, size); - PROC_UNLOCK(td->td_proc); + if (racct_enable) { + PROC_LOCK(td->td_proc); + racct_sub(td->td_proc, RACCT_NSHM, 1); + racct_sub(td->td_proc, RACCT_SHMSIZE, size); + PROC_UNLOCK(td->td_proc); + } #endif return (ENOMEM); } @@ -961,39 +965,39 @@ oshmctl(struct thread *td, struct oshmctl_args *uap) if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) return (ENOSYS); + if (uap->cmd != IPC_STAT) { + return (freebsd7_shmctl(td, + (struct freebsd7_shmctl_args *)uap)); + } SYSVSHM_LOCK(); shmseg = shm_find_segment(uap->shmid, true); if (shmseg == NULL) { SYSVSHM_UNLOCK(); return (EINVAL); } - switch (uap->cmd) { - case IPC_STAT: - error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); - if (error != 0) - break; + error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); + if (error != 0) { + SYSVSHM_UNLOCK(); + return (error); + } #ifdef MAC - error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, - uap->cmd); - if (error != 0) - break; -#endif - ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm); - outbuf.shm_segsz = shmseg->u.shm_segsz; - outbuf.shm_cpid = shmseg->u.shm_cpid; - outbuf.shm_lpid = shmseg->u.shm_lpid; - outbuf.shm_nattch = shmseg->u.shm_nattch; - outbuf.shm_atime = shmseg->u.shm_atime; - outbuf.shm_dtime = shmseg->u.shm_dtime; - outbuf.shm_ctime = shmseg->u.shm_ctime; - outbuf.shm_handle = shmseg->object; - error = copyout(&outbuf, uap->ubuf, sizeof(outbuf)); - break; - default: - error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap); - break; + error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd); + if (error != 0) { + SYSVSHM_UNLOCK(); + return (error); } +#endif + ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm); + outbuf.shm_segsz = shmseg->u.shm_segsz; + outbuf.shm_cpid = shmseg->u.shm_cpid; + outbuf.shm_lpid = shmseg->u.shm_lpid; + outbuf.shm_nattch = shmseg->u.shm_nattch; + outbuf.shm_atime = shmseg->u.shm_atime; + outbuf.shm_dtime = shmseg->u.shm_dtime; + outbuf.shm_ctime = shmseg->u.shm_ctime; + outbuf.shm_handle = shmseg->object; SYSVSHM_UNLOCK(); + error = copyout(&outbuf, uap->ubuf, sizeof(outbuf)); return (error); #else return (EINVAL); @@ -1025,9 +1029,7 @@ sys_shmsys(struct thread *td, struct shmsys_args *uap) return (ENOSYS); if (uap->which < 0 || uap->which >= nitems(shmcalls)) return (EINVAL); - SYSVSHM_LOCK(); error = (*shmcalls[uap->which])(td, &uap->a2); - SYSVSHM_UNLOCK(); return (error); } diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 8410ed9..93c7ed1 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -170,7 +170,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL && !vm_pager_has_page(obj, idx, NULL, NULL)) { VM_OBJECT_WUNLOCK(obj); - return (uiomove(__DECONST(void *, zero_region), len, uio)); + return (uiomove(__DECONST(void *, zero_region), tlen, uio)); } /* diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index c7e602e..0bfcf2d 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -59,10 +59,12 @@ __FBSDID("$FreeBSD$"); #include <sys/conf.h> #include <sys/event.h> #include <sys/mount.h> +#include <geom/geom.h> #include <machine/atomic.h> #include <vm/vm.h> +#include <vm/vm_page.h> #include <vm/vm_extern.h> #include <vm/pmap.h> #include <vm/vm_map.h> @@ -232,9 +234,10 @@ struct aiocblist { int jobstate; /* (b) job state */ int inputcharge; /* (*) input blockes */ int outputcharge; /* (*) output blockes */ - struct buf *bp; /* (*) private to BIO backend, - * buffer pointer - */ + struct bio *bp; /* (*) BIO backend BIO pointer */ + struct buf *pbuf; /* (*) BIO backend buffer pointer */ + struct vm_page *pages[btoc(MAXPHYS)+1]; /* BIO backend pages */ + int npages; /* BIO backend number of pages */ struct proc *userproc; /* (*) user process */ struct ucred *cred; /* (*) active credential when created */ struct file *fd_file; /* (*) pointer to file structure */ @@ -243,7 +246,6 @@ struct aiocblist { struct knlist klist; /* (a) list of knotes */ struct aiocb uaiocb; /* (*) kernel I/O control block */ ksiginfo_t ksi; /* (a) realtime signal info */ - struct task biotask; /* (*) private to BIO backend */ uint64_t seqno; /* (*) job number */ int pending; /* (a) number of pending I/O, aio_fsync only */ }; @@ -344,11 +346,10 @@ static void aio_process_mlock(struct aiocblist *aiocbe); static int aio_newproc(int *); int aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lio, int type, struct aiocb_ops *ops); -static void aio_physwakeup(struct buf *bp); +static void aio_physwakeup(struct bio *bp); static void aio_proc_rundown(void *arg, struct proc *p); static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp); static int aio_qphysio(struct proc *p, struct aiocblist *iocb); -static void biohelper(void *, int); static void aio_daemon(void *param); static void aio_swake_cb(struct socket *, struct sockbuf *); static int aio_unload(void); @@ -1294,13 +1295,15 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) { struct aiocb *cb; struct file *fp; - struct buf *bp; + struct bio *bp; + struct buf *pbuf; struct vnode *vp; struct cdevsw *csw; struct cdev *dev; struct kaioinfo *ki; struct aioliojob *lj; - int error, ref; + int error, ref, unmap, poff; + vm_prot_t prot; cb = &aiocbe->uaiocb; fp = aiocbe->fd_file; @@ -1309,107 +1312,121 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) return (-1); vp = fp->f_vnode; - - /* - * If its not a disk, we don't want to return a positive error. - * It causes the aio code to not fall through to try the thread - * way when you're talking to a regular file. - */ - if (!vn_isdisk(vp, &error)) { - if (error == ENOTBLK) - return (-1); - else - return (error); - } - - if (vp->v_bufobj.bo_bsize == 0) - return (-1); - - if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) + if (vp->v_type != VCHR) return (-1); - - if (cb->aio_nbytes > - MAXPHYS - (((vm_offset_t) cb->aio_buf) & PAGE_MASK)) + if (vp->v_bufobj.bo_bsize == 0) return (-1); - - ki = p->p_aioinfo; - if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) + if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) return (-1); ref = 0; csw = devvn_refthread(vp, &dev, &ref); if (csw == NULL) return (ENXIO); + + if ((csw->d_flags & D_DISK) == 0) { + error = -1; + goto unref; + } if (cb->aio_nbytes > dev->si_iosize_max) { error = -1; goto unref; } - /* Create and build a buffer header for a transfer. */ - bp = (struct buf *)getpbuf(NULL); - BUF_KERNPROC(bp); + ki = p->p_aioinfo; + poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; + unmap = ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed); + if (unmap) { + if (cb->aio_nbytes > MAXPHYS) { + error = -1; + goto unref; + } + } else { + if (cb->aio_nbytes > MAXPHYS - poff) { + error = -1; + goto unref; + } + if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { + error = -1; + goto unref; + } + } + aiocbe->bp = bp = g_alloc_bio(); + if (!unmap) { + aiocbe->pbuf = pbuf = (struct buf *)getpbuf(NULL); + BUF_KERNPROC(pbuf); + } AIO_LOCK(ki); ki->kaio_count++; - ki->kaio_buffer_count++; + if (!unmap) + ki->kaio_buffer_count++; lj = aiocbe->lio; if (lj) lj->lioj_count++; - AIO_UNLOCK(ki); - - /* - * Get a copy of the kva from the physical buffer. - */ - error = 0; - - bp->b_bcount = cb->aio_nbytes; - bp->b_bufsize = cb->aio_nbytes; - bp->b_iodone = aio_physwakeup; - bp->b_saveaddr = bp->b_data; - bp->b_data = (void *)(uintptr_t)cb->aio_buf; - bp->b_offset = cb->aio_offset; - bp->b_iooffset = cb->aio_offset; - bp->b_blkno = btodb(cb->aio_offset); - bp->b_iocmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; - - /* - * Bring buffer into kernel space. - */ - if (vmapbuf(bp, (dev->si_flags & SI_UNMAPPED) == 0) < 0) { - error = EFAULT; - goto doerror; - } - - AIO_LOCK(ki); - aiocbe->bp = bp; - bp->b_caller1 = (void *)aiocbe; TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); TAILQ_INSERT_TAIL(&ki->kaio_all, aiocbe, allist); aiocbe->jobstate = JOBST_JOBQBUF; cb->_aiocb_private.status = cb->aio_nbytes; AIO_UNLOCK(ki); - atomic_add_int(&num_queue_count, 1); - atomic_add_int(&num_buf_aio, 1); - - bp->b_error = 0; + bp->bio_length = cb->aio_nbytes; + bp->bio_bcount = cb->aio_nbytes; + bp->bio_done = aio_physwakeup; + bp->bio_data = (void *)(uintptr_t)cb->aio_buf; + bp->bio_offset = cb->aio_offset; + bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; + bp->bio_dev = dev; + bp->bio_caller1 = (void *)aiocbe; + + prot = VM_PROT_READ; + if (cb->aio_lio_opcode == LIO_READ) + prot |= VM_PROT_WRITE; /* Less backwards than it looks */ + if ((aiocbe->npages = vm_fault_quick_hold_pages( + &curproc->p_vmspace->vm_map, + (vm_offset_t)bp->bio_data, bp->bio_length, prot, aiocbe->pages, + sizeof(aiocbe->pages)/sizeof(aiocbe->pages[0]))) < 0) { + error = EFAULT; + goto doerror; + } + if (!unmap) { + pmap_qenter((vm_offset_t)pbuf->b_data, + aiocbe->pages, aiocbe->npages); + bp->bio_data = pbuf->b_data + poff; + } else { + bp->bio_ma = aiocbe->pages; + bp->bio_ma_n = aiocbe->npages; + bp->bio_ma_offset = poff; + bp->bio_data = unmapped_buf; + bp->bio_flags |= BIO_UNMAPPED; + } - TASK_INIT(&aiocbe->biotask, 0, biohelper, aiocbe); + atomic_add_int(&num_queue_count, 1); + if (!unmap) + atomic_add_int(&num_buf_aio, 1); /* Perform transfer. */ - dev_strategy_csw(dev, csw, bp); + csw->d_strategy(bp); dev_relthread(dev, ref); return (0); doerror: AIO_LOCK(ki); + aiocbe->jobstate = JOBST_NULL; + TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist); + TAILQ_REMOVE(&ki->kaio_all, aiocbe, allist); ki->kaio_count--; - ki->kaio_buffer_count--; + if (!unmap) + ki->kaio_buffer_count--; if (lj) lj->lioj_count--; - aiocbe->bp = NULL; AIO_UNLOCK(ki); - relpbuf(bp, NULL); + if (pbuf) { + relpbuf(pbuf, NULL); + aiocbe->pbuf = NULL; + } + g_destroy_bio(bp); + aiocbe->bp = NULL; unref: dev_relthread(dev, ref); return (error); @@ -1787,8 +1804,6 @@ no_kqueue: } #endif queueit: - /* No buffer for daemon I/O. */ - aiocbe->bp = NULL; atomic_add_int(&num_queue_count, 1); AIO_LOCK(ki); @@ -2425,54 +2440,43 @@ sys_lio_listio(struct thread *td, struct lio_listio_args *uap) return (error); } -/* - * Called from interrupt thread for physio, we should return as fast - * as possible, so we schedule a biohelper task. - */ static void -aio_physwakeup(struct buf *bp) +aio_physwakeup(struct bio *bp) { - struct aiocblist *aiocbe; - - aiocbe = (struct aiocblist *)bp->b_caller1; - taskqueue_enqueue(taskqueue_aiod_bio, &aiocbe->biotask); -} - -/* - * Task routine to perform heavy tasks, process wakeup, and signals. - */ -static void -biohelper(void *context, int pending) -{ - struct aiocblist *aiocbe = context; - struct buf *bp; + struct aiocblist *aiocbe = (struct aiocblist *)bp->bio_caller1; struct proc *userp; struct kaioinfo *ki; int nblks; + /* Release mapping into kernel space. */ + if (aiocbe->pbuf) { + pmap_qremove((vm_offset_t)aiocbe->pbuf->b_data, aiocbe->npages); + relpbuf(aiocbe->pbuf, NULL); + aiocbe->pbuf = NULL; + atomic_subtract_int(&num_buf_aio, 1); + } + vm_page_unhold_pages(aiocbe->pages, aiocbe->npages); + bp = aiocbe->bp; + aiocbe->bp = NULL; userp = aiocbe->userproc; ki = userp->p_aioinfo; AIO_LOCK(ki); - aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; + aiocbe->uaiocb._aiocb_private.status -= bp->bio_resid; aiocbe->uaiocb._aiocb_private.error = 0; - if (bp->b_ioflags & BIO_ERROR) - aiocbe->uaiocb._aiocb_private.error = bp->b_error; + if (bp->bio_flags & BIO_ERROR) + aiocbe->uaiocb._aiocb_private.error = bp->bio_error; nblks = btodb(aiocbe->uaiocb.aio_nbytes); if (aiocbe->uaiocb.aio_lio_opcode == LIO_WRITE) aiocbe->outputcharge += nblks; else aiocbe->inputcharge += nblks; - aiocbe->bp = NULL; TAILQ_REMOVE(&userp->p_aioinfo->kaio_bufqueue, aiocbe, plist); ki->kaio_buffer_count--; aio_bio_done_notify(userp, aiocbe, DONE_BUF); AIO_UNLOCK(ki); - /* Release mapping into kernel space. */ - vunmapbuf(bp); - relpbuf(bp, NULL); - atomic_subtract_int(&num_buf_aio, 1); + g_destroy_bio(bp); } /* syscall - wait for the next completion of an aio request */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index dfe2997..5ac04ac 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -113,8 +113,8 @@ static void vfs_setdirty_locked_object(struct buf *bp); static void vfs_vmio_release(struct buf *bp); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); -static int buf_flush(int); -static int flushbufqueues(int, int); +static int buf_flush(struct vnode *vp, int); +static int flushbufqueues(struct vnode *, int, int); static void buf_daemon(void); static void bremfreel(struct buf *bp); static __inline void bd_wakeup(void); @@ -805,6 +805,7 @@ bufinit(void) struct buf *bp; int i; + CTASSERT(MAXBCACHEBUF >= MAXBSIZE); mtx_init(&bqclean, "bufq clean lock", NULL, MTX_DEF); mtx_init(&bqdirty, "bufq dirty lock", NULL, MTX_DEF); mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); @@ -846,8 +847,8 @@ bufinit(void) * by the system. */ maxbufspace = (long)nbuf * BKVASIZE; - hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10); - lobufspace = hibufspace - MAXBSIZE; + hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); + lobufspace = hibufspace - MAXBCACHEBUF; /* * Note: The 16 MiB upper limit for hirunningspace was chosen @@ -857,9 +858,9 @@ bufinit(void) * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ - hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBSIZE), + hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), 16 * 1024 * 1024), 1024 * 1024); - lorunningspace = roundup((hirunningspace * 2) / 3, MAXBSIZE); + lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); /* * Limit the amount of malloc memory since it is wired permanently into @@ -2096,7 +2097,7 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, { struct thread *td; char *waitmsg; - int cnt, error, flags, norunbuf, wait; + int error, fl, flags, norunbuf; mtx_assert(&bqclean, MA_OWNED); @@ -2118,8 +2119,6 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, return; td = curthread; - cnt = 0; - wait = MNT_NOWAIT; rw_wlock(&nblock); while ((needsbuffer & flags) != 0) { if (vp != NULL && vp->v_type != VCHR && @@ -2133,20 +2132,23 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, * cannot be achieved by the buf_daemon, that * cannot lock the vnode. */ - if (cnt++ > 2) - wait = MNT_WAIT; - ASSERT_VOP_LOCKED(vp, "bufd_helper"); - error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 : - vn_lock(vp, LK_TRYUPGRADE); - if (error == 0) { - /* play bufdaemon */ - norunbuf = curthread_pflags_set(TDP_BUFNEED | - TDP_NORUNNINGBUF); - VOP_FSYNC(vp, wait, td); - atomic_add_long(¬bufdflushes, 1); - curthread_pflags_restore(norunbuf); - } + norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | + (td->td_pflags & TDP_NORUNNINGBUF); + + /* + * Play bufdaemon. The getnewbuf() function + * may be called while the thread owns lock + * for another dirty buffer for the same + * vnode, which makes it impossible to use + * VOP_FSYNC() there, due to the buffer lock + * recursion. + */ + td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; + fl = buf_flush(vp, flushbufqtarget); + td->td_pflags &= norunbuf; rw_wlock(&nblock); + if (fl != 0) + continue; if ((needsbuffer & flags) == 0) break; } @@ -2565,18 +2567,20 @@ static struct kproc_desc buf_kp = { SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); static int -buf_flush(int target) +buf_flush(struct vnode *vp, int target) { int flushed; - flushed = flushbufqueues(target, 0); + flushed = flushbufqueues(vp, target, 0); if (flushed == 0) { /* * Could not find any buffers without rollback * dependencies, so just write the first one * in the hopes of eventually making progress. */ - flushed = flushbufqueues(target, 1); + if (vp != NULL && target > 2) + target /= 2; + flushbufqueues(vp, target, 1); } return (flushed); } @@ -2613,7 +2617,7 @@ buf_daemon() * the I/O system. */ while (numdirtybuffers > lodirty) { - if (buf_flush(numdirtybuffers - lodirty) == 0) + if (buf_flush(NULL, numdirtybuffers - lodirty) == 0) break; kern_yield(PRI_USER); } @@ -2668,7 +2672,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps, 0, "Number of buffers flushed with dependecies that require rollbacks"); static int -flushbufqueues(int target, int flushdeps) +flushbufqueues(struct vnode *lvp, int target, int flushdeps) { struct buf *sentinel; struct vnode *vp; @@ -2678,6 +2682,7 @@ flushbufqueues(int target, int flushdeps) int flushed; int queue; int error; + bool unlock; flushed = 0; queue = QUEUE_DIRTY; @@ -2699,8 +2704,18 @@ flushbufqueues(int target, int flushdeps) mtx_unlock(&bqdirty); break; } - KASSERT(bp->b_qindex != QUEUE_SENTINEL, - ("parallel calls to flushbufqueues() bp %p", bp)); + /* + * Skip sentinels inserted by other invocations of the + * flushbufqueues(), taking care to not reorder them. + * + * Only flush the buffers that belong to the + * vnode locked by the curthread. + */ + if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL && + bp->b_vp != lvp)) { + mtx_unlock(&bqdirty); + continue; + } error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL); mtx_unlock(&bqdirty); if (error != 0) @@ -2748,16 +2763,37 @@ flushbufqueues(int target, int flushdeps) BUF_UNLOCK(bp); continue; } - error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); + if (lvp == NULL) { + unlock = true; + error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); + } else { + ASSERT_VOP_LOCKED(vp, "getbuf"); + unlock = false; + error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 : + vn_lock(vp, LK_TRYUPGRADE); + } if (error == 0) { CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); - vfs_bio_awrite(bp); + if (curproc == bufdaemonproc) { + vfs_bio_awrite(bp); + } else { + bremfree(bp); + bwrite(bp); + notbufdflushes++; + } vn_finished_write(mp); - VOP_UNLOCK(vp, 0); + if (unlock) + VOP_UNLOCK(vp, 0); flushwithdeps += hasdeps; flushed++; - if (runningbufspace > hirunningspace) + + /* + * Sleeping on runningbufspace while holding + * vnode lock leads to deadlock. + */ + if (curproc == bufdaemonproc && + runningbufspace > hirunningspace) waitrunningbufspace(); continue; } @@ -3073,8 +3109,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); - if (size > MAXBSIZE) - panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE); + if (size > MAXBCACHEBUF) + panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, + MAXBCACHEBUF); if (!unmapped_buf_allowed) flags &= ~(GB_UNMAPPED | GB_KVAALLOC); |