From 2bf12d0c7ceced74a813d6940dd07310a11d6a2a Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 10:46:54 +0000 Subject: MFcalloutng: When CPU becomes idle, cpu_idleclock() calculates time to the next timer event in order to reprogram hw timer. Return that time in sbintime_t to the caller and pass it to acpi_cpu_idle(), where it can be used as one more factor (quite precise) to extimate furter sleep time and choose optimal sleep state. This is a preparatory change for further callout improvements will be committed in the next days. The commmit is not targeted for MFC. --- sys/kern/kern_clocksource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c index 33f21e9..ac7e2de 100644 --- a/sys/kern/kern_clocksource.c +++ b/sys/kern/kern_clocksource.c @@ -767,7 +767,7 @@ cpu_stopprofclock(void) /* * Switch to idle mode (all ticks handled). */ -void +sbintime_t cpu_idleclock(void) { struct bintime now, t; @@ -779,7 +779,7 @@ cpu_idleclock(void) || curcpu == CPU_FIRST() #endif ) - return; + return (-1); state = DPCPU_PTR(timerstate); if (periodic) now = state->now; @@ -795,6 +795,8 @@ cpu_idleclock(void) if (!periodic) loadtimer(&now, 0); ET_HW_UNLOCK(state); + bintime_sub(&t, &now); + return (MAX(bttosbt(t), 0)); } /* -- cgit v1.1 From 6cf7cc6e4d8da1cf9aba1c481b914e4ca5e9f38f Mon Sep 17 00:00:00 2001 From: mav Date: Thu, 28 Feb 2013 13:46:03 +0000 Subject: MFcalloutng: Switch eventtimers(9) from using struct bintime to sbintime_t. Even before this not a single driver really supported full dynamic range of struct bintime even in theory, not speaking about practical inexpediency. This change legitimates the status quo and cleans up the code. --- sys/kern/kern_clocksource.c | 16 +++++++------- sys/kern/kern_et.c | 51 +++++++++++++++++---------------------------- 2 files changed, 28 insertions(+), 39 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c index ac7e2de..10732d9 100644 --- a/sys/kern/kern_clocksource.c +++ b/sys/kern/kern_clocksource.c @@ -153,6 +153,8 @@ static DPCPU_DEFINE(struct pcpu_state, timerstate); (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ ((bt)->frac >> 1)) +#define SBT2FREQ(sbt) ((SBT_1S + ((sbt) >> 1)) / (sbt)) + /* * Timer broadcast IPI handler. */ @@ -442,7 +444,7 @@ loadtimer(struct bintime *now, int start) new.sec, (u_int)(new.frac >> 32)); *next = new; bintime_add(next, now); - et_start(timer, &new, &timerperiod); + et_start(timer, bttosbt(new), bttosbt(timerperiod)); } } else { getnextevent(&new); @@ -454,7 +456,7 @@ loadtimer(struct bintime *now, int start) if (!eq) { *next = new; bintime_sub(&new, now); - et_start(timer, &new, NULL); + et_start(timer, bttosbt(new), 0); } } } @@ -603,13 +605,13 @@ round_freq(struct eventtimer *et, int freq) div = 1 << (flsl(div + div / 2) - 1); freq = (et->et_frequency + div / 2) / div; } - if (et->et_min_period.sec > 0) + if (et->et_min_period > SBT_1S) panic("Event timer \"%s\" doesn't support sub-second periods!", et->et_name); - else if (et->et_min_period.frac != 0) - freq = min(freq, BT2FREQ(&et->et_min_period)); - if (et->et_max_period.sec == 0 && et->et_max_period.frac != 0) - freq = max(freq, BT2FREQ(&et->et_max_period)); + else if (et->et_min_period != 0) + freq = min(freq, SBT2FREQ(et->et_min_period)); + if (et->et_max_period < SBT_1S && et->et_max_period != 0) + freq = max(freq, SBT2FREQ(et->et_max_period)); return (freq); } diff --git a/sys/kern/kern_et.c b/sys/kern/kern_et.c index 3156c81..472db79 100644 --- a/sys/kern/kern_et.c +++ b/sys/kern/kern_et.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2010 Alexander Motin + * Copyright (c) 2010-2013 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,6 +62,7 @@ et_register(struct eventtimer *et) et->et_quality); } } + KASSERT(et->et_start, ("et_register: timer has no start function")); et->et_sysctl = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_eventtimer_et), OID_AUTO, et->et_name, CTLFLAG_RW, 0, "event timer description"); @@ -159,43 +160,29 @@ et_init(struct eventtimer *et, et_event_cb_t *event, * period - period of subsequent periodic ticks. */ int -et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { if (!et->et_active) return (ENXIO); - if (first == NULL && period == NULL) - return (EINVAL); - if ((et->et_flags & ET_FLAGS_PERIODIC) == 0 && - period != NULL) - return (ENODEV); - if ((et->et_flags & ET_FLAGS_ONESHOT) == 0 && - period == NULL) - return (ENODEV); - if (first != NULL) { - if (first->sec < et->et_min_period.sec || - (first->sec == et->et_min_period.sec && - first->frac < et->et_min_period.frac)) - first = &et->et_min_period; - if (first->sec > et->et_max_period.sec || - (first->sec == et->et_max_period.sec && - first->frac > et->et_max_period.frac)) - first = &et->et_max_period; + KASSERT(period >= 0, ("et_start: negative period")); + KASSERT((et->et_flags & ET_FLAGS_PERIODIC) || period == 0, + ("et_start: period specified for oneshot-only timer")); + KASSERT((et->et_flags & ET_FLAGS_ONESHOT) && period == 0, + ("et_start: period not specified for periodic-only timer")); + if (period != 0) { + if (period < et->et_min_period) + period = et->et_min_period; + else if (period > et->et_max_period) + period = et->et_max_period; } - if (period != NULL) { - if (period->sec < et->et_min_period.sec || - (period->sec == et->et_min_period.sec && - period->frac < et->et_min_period.frac)) - period = &et->et_min_period; - if (period->sec > et->et_max_period.sec || - (period->sec == et->et_max_period.sec && - period->frac > et->et_max_period.frac)) - period = &et->et_max_period; + if (period == 0 || first != 0) { + if (first < et->et_min_period) + first = et->et_min_period; + else if (first > et->et_max_period) + first = et->et_max_period; } - if (et->et_start) - return (et->et_start(et, first, period)); - return (0); + return (et->et_start(et, first, period)); } /* Stop event timer hardware. */ -- cgit v1.1 From 9f70262c678d34ac22ebb61da9b7ae1f7b9dffd4 Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 16:22:49 +0000 Subject: MFcalloutng: Style fixes. --- sys/kern/kern_timeout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 80933fa..9cdc39b 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -100,7 +100,7 @@ struct cc_mig_ent { int ce_migration_ticks; #endif }; - + /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. @@ -613,7 +613,7 @@ skip: } /* - * The callout mechanism is based on the work of Adam M. Costello and + * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures -- cgit v1.1 From 0da1880cbc10809b659cabb782a330c021999d1a Mon Sep 17 00:00:00 2001 From: kib Date: Fri, 1 Mar 2013 18:40:14 +0000 Subject: Make the default implementation of the VOP_VPTOCNP() fail if the directory entry, matched by the inode number, is ".". NFSv4 client might instantiate the distinct vnodes which have the same inode number, since single v4 export can be combined from several filesystems on the server. For instance, a case when the nested server mount point is exactly one directory below the top of the export, causes directory and its parent to have the same inode number 2. The vop_stdvptocnp() algorithm then returns "." as the name of the lower directory. Filtering out the "." entry with ENOENT works around this behaviour, the error forces getcwd(3) to fall back to usermode implementation, which compares both st_dev and st_ino. Based on the submission by: rmacklem Tested by: rmacklem MFC after: 1 week --- sys/kern/vfs_default.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 00d064e..1dd0185 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -856,8 +856,12 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap) error = ENOMEM; goto out; } - bcopy(dp->d_name, buf + i, dp->d_namlen); - error = 0; + if (dp->d_namlen == 1 && dp->d_name[0] == '.') { + error = ENOENT; + } else { + bcopy(dp->d_name, buf + i, dp->d_namlen); + error = 0; + } goto out; } } while (len > 0 || !eofflag); -- cgit v1.1 From 11e6f714beffd0a6169d8c891ae993508fafba2c Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 18:49:14 +0000 Subject: - Use strdup(9) instead of reimplementing it. - Use __DECONST instead of strange casts. - Reduce code duplication and simplify name2oid(). PR: 176373 Submitted by: Christoph Mallon MFC after: 1 week --- sys/kern/kern_sysctl.c | 70 +++++++++++++------------------------------------- 1 file changed, 18 insertions(+), 52 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 33296d3..68bf453 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -444,9 +444,9 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); } if (oidp->oid_descr) - free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID); - free((void *)(uintptr_t)(const void *)oidp->oid_name, - M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_descr), + M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); free(oidp, M_SYSCTLOID); } } @@ -462,8 +462,6 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) { struct sysctl_oid *oidp; - ssize_t len; - char *newname; /* You have to hook up somewhere.. */ if (parent == NULL) @@ -490,11 +488,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, SLIST_NEXT(oidp, oid_link) = NULL; oidp->oid_number = number; oidp->oid_refcnt = 1; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; - oidp->oid_name = newname; + oidp->oid_name = strdup(name, M_SYSCTLOID); oidp->oid_handler = handler; oidp->oid_kind = CTLFLAG_DYN | kind; if ((kind & CTLTYPE) == CTLTYPE_NODE) { @@ -508,12 +502,8 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, oidp->oid_arg2 = arg2; } oidp->oid_fmt = fmt; - if (descr) { - int len = strlen(descr) + 1; - oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK); - if (oidp->oid_descr) - strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr); - } + if (descr) + oidp->oid_descr = strdup(descr, M_SYSCTLOID); /* Update the context, if used */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); @@ -529,16 +519,12 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) { - ssize_t len; char *newname; - void *oldname; + char *oldname; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; + newname = strdup(name, M_SYSCTLOID); SYSCTL_XLOCK(); - oldname = (void *)(uintptr_t)(const void *)oidp->oid_name; + oldname = __DECONST(char *, oidp->oid_name); oidp->oid_name = newname; SYSCTL_XUNLOCK(); free(oldname, M_SYSCTLOID); @@ -823,39 +809,26 @@ static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD, static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) { - int i; struct sysctl_oid *oidp; struct sysctl_oid_list *lsp = &sysctl__children; char *p; SYSCTL_ASSERT_XLOCKED(); - if (!*name) - return (ENOENT); - - p = name + strlen(name) - 1 ; - if (*p == '.') - *p = '\0'; - - *len = 0; + for (*len = 0; *len < CTL_MAXNAME;) { + p = strsep(&name, "."); - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; - - oidp = SLIST_FIRST(lsp); - - while (oidp && *len < CTL_MAXNAME) { - if (strcmp(name, oidp->oid_name)) { - oidp = SLIST_NEXT(oidp, oid_link); - continue; + oidp = SLIST_FIRST(lsp); + for (;; oidp = SLIST_NEXT(oidp, oid_link)) { + if (oidp == NULL) + return (ENOENT); + if (strcmp(p, oidp->oid_name) == 0) + break; } *oid++ = oidp->oid_number; (*len)++; - if (!i) { + if (name == NULL || *name == '\0') { if (oidpp) *oidpp = oidp; return (0); @@ -868,13 +841,6 @@ name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) break; lsp = SYSCTL_CHILDREN(oidp); - oidp = SLIST_FIRST(lsp); - name = p+1; - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; } return (ENOENT); } -- cgit v1.1 From ffd2079a18a79d32aee02f64cfe1de0ccd450177 Mon Sep 17 00:00:00 2001 From: pjd Date: Fri, 1 Mar 2013 21:57:02 +0000 Subject: Reduce lock scope a little. --- sys/kern/vfs_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sys/kern') diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bbda70d..bd44a3a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2633,9 +2633,9 @@ setfflags(td, vp, flags) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_flags = flags; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) -- cgit v1.1 From 54fb726d9b32179cee99e8e32cc6fffe36dfca7c Mon Sep 17 00:00:00 2001 From: pjd Date: Fri, 1 Mar 2013 21:58:56 +0000 Subject: Remove unnecessary variables. --- sys/kern/vfs_vnops.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 32c0978..96ce9e2 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1860,7 +1860,6 @@ vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct vnode *vp; - int error; vp = fp->f_vnode; #ifdef AUDIT @@ -1868,8 +1867,7 @@ vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif - error = setfmode(td, active_cred, vp, mode); - return (error); + return (setfmode(td, active_cred, vp, mode)); } int @@ -1877,7 +1875,6 @@ vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct vnode *vp; - int error; vp = fp->f_vnode; #ifdef AUDIT @@ -1885,8 +1882,7 @@ vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif - error = setfown(td, active_cred, vp, uid, gid); - return (error); + return (setfown(td, active_cred, vp, uid, gid)); } void -- cgit v1.1 From 8857575b13cf118cc89efb1b462dd314df09c180 Mon Sep 17 00:00:00 2001 From: jhb Date: Fri, 1 Mar 2013 22:03:31 +0000 Subject: Replace the TDP_NOSLEEPING flag with a counter so that the THREAD_NO_SLEEPING() and THREAD_SLEEPING_OK() macros can nest. Reviewed by: attilio --- sys/kern/subr_sleepqueue.c | 4 ++-- sys/kern/subr_trap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index b6bd8fc..f187544 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -296,8 +296,8 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, MPASS((queue >= 0) && (queue < NR_SLEEPQS)); /* If this thread is not allowed to sleep, die a horrible death. */ - KASSERT(!(td->td_pflags & TDP_NOSLEEPING), - ("%s: td %p to sleep on wchan %p with TDP_NOSLEEPING on", + KASSERT(td->td_no_sleeping == 0, + ("%s: td %p to sleep on wchan %p with sleeping prohibited", __func__, td, wchan)); /* Look up the sleep queue associated with the wait channel 'wchan'. */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index bd06f20..1f24e88 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -158,7 +158,7 @@ userret(struct thread *td, struct trapframe *frame) ("userret: Returning with %d locks held", td->td_locks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); - KASSERT((td->td_pflags & TDP_NOSLEEPING) == 0, + KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); -- cgit v1.1 From f07ebb8888ea42f744890a727e8f6799a1086915 Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:53:12 +0000 Subject: Merge Capsicum overhaul: - Capability is no longer separate descriptor type. Now every descriptor has set of its own capability rights. - The cap_new(2) system call is left, but it is no longer documented and should not be used in new code. - The new syscall cap_rights_limit(2) should be used instead of cap_new(2), which limits capability rights of the given descriptor without creating a new one. - The cap_getrights(2) syscall is renamed to cap_rights_get(2). - If CAP_IOCTL capability right is present we can further reduce allowed ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed ioctls can be retrived with cap_ioctls_get(2) syscall. - If CAP_FCNTL capability right is present we can further reduce fcntls that can be used with the new cap_fcntls_limit(2) syscall and retrive them with cap_fcntls_get(2). - To support ioctl and fcntl white-listing the filedesc structure was heavly modified. - The audit subsystem, kdump and procstat tools were updated to recognize new syscalls. - Capability rights were revised and eventhough I tried hard to provide backward API and ABI compatibility there are some incompatible changes that are described in detail below: CAP_CREATE old behaviour: - Allow for openat(2)+O_CREAT. - Allow for linkat(2). - Allow for symlinkat(2). CAP_CREATE new behaviour: - Allow for openat(2)+O_CREAT. Added CAP_LINKAT: - Allow for linkat(2). ABI: Reuses CAP_RMDIR bit. - Allow to be target for renameat(2). Added CAP_SYMLINKAT: - Allow for symlinkat(2). Removed CAP_DELETE. Old behaviour: - Allow for unlinkat(2) when removing non-directory object. - Allow to be source for renameat(2). Removed CAP_RMDIR. Old behaviour: - Allow for unlinkat(2) when removing directory. Added CAP_RENAMEAT: - Required for source directory for the renameat(2) syscall. Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR): - Allow for unlinkat(2) on any object. - Required if target of renameat(2) exists and will be removed by this call. Removed CAP_MAPEXEC. CAP_MMAP old behaviour: - Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and PROT_WRITE. CAP_MMAP new behaviour: - Allow for mmap(2)+PROT_NONE. Added CAP_MMAP_R: - Allow for mmap(PROT_READ). Added CAP_MMAP_W: - Allow for mmap(PROT_WRITE). Added CAP_MMAP_X: - Allow for mmap(PROT_EXEC). Added CAP_MMAP_RW: - Allow for mmap(PROT_READ | PROT_WRITE). Added CAP_MMAP_RX: - Allow for mmap(PROT_READ | PROT_EXEC). Added CAP_MMAP_WX: - Allow for mmap(PROT_WRITE | PROT_EXEC). Added CAP_MMAP_RWX: - Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). Renamed CAP_MKDIR to CAP_MKDIRAT. Renamed CAP_MKFIFO to CAP_MKFIFOAT. Renamed CAP_MKNODE to CAP_MKNODEAT. CAP_READ old behaviour: - Allow pread(2). - Disallow read(2), readv(2) (if there is no CAP_SEEK). CAP_READ new behaviour: - Allow read(2), readv(2). - Disallow pread(2) (CAP_SEEK was also required). CAP_WRITE old behaviour: - Allow pwrite(2). - Disallow write(2), writev(2) (if there is no CAP_SEEK). CAP_WRITE new behaviour: - Allow write(2), writev(2). - Disallow pwrite(2) (CAP_SEEK was also required). Added convinient defines: #define CAP_PREAD (CAP_SEEK | CAP_READ) #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) Added defines for backward API compatibility: #define CAP_MAPEXEC CAP_MMAP_X #define CAP_DELETE CAP_UNLINKAT #define CAP_MKDIR CAP_MKDIRAT #define CAP_RMDIR CAP_UNLINKAT #define CAP_MKFIFO CAP_MKFIFOAT #define CAP_MKNOD CAP_MKNODAT #define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) Sponsored by: The FreeBSD Foundation Reviewed by: Christoph Mallon Many aspects discussed with: rwatson, benl, jonathan ABI compatibility discussed with: kib --- sys/kern/capabilities.conf | 18 +- sys/kern/kern_descrip.c | 663 ++++++++++++++++++++++----------------------- sys/kern/kern_exec.c | 3 - sys/kern/kern_exit.c | 2 +- sys/kern/kern_fork.c | 2 +- sys/kern/sys_capability.c | 645 +++++++++++++++++++++++-------------------- sys/kern/sys_generic.c | 131 +++++---- sys/kern/syscalls.master | 12 +- sys/kern/tty.c | 16 +- sys/kern/uipc_mqueue.c | 9 +- sys/kern/uipc_sem.c | 4 +- sys/kern/uipc_shm.c | 4 +- sys/kern/uipc_syscalls.c | 46 ++-- sys/kern/uipc_usrreq.c | 89 +++--- sys/kern/vfs_aio.c | 6 +- sys/kern/vfs_lookup.c | 13 +- sys/kern/vfs_syscalls.c | 88 +++--- 17 files changed, 908 insertions(+), 843 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 11aad16..3c08782 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -110,9 +110,14 @@ aio_write ## Allow capability mode and capability system calls. ## cap_enter +cap_fcntls_get +cap_fcntls_limit cap_getmode -cap_getrights +cap_ioctls_get +cap_ioctls_limit cap_new +cap_rights_get +cap_rights_limit ## ## Allow read-only clock operations. @@ -239,7 +244,7 @@ getcontext ## Allow directory I/O on a file descriptor, subject to capability rights. ## Originally we had separate capabilities for directory-specific read ## operations, but on BSD we allow reading the raw directory data, so we just -## rely on CAP_READ and CAP_SEEK now. +## rely on CAP_READ now. ## getdents getdirentries @@ -317,13 +322,10 @@ gettimeofday getuid ## -## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global -## scope, but this is a tricky one as it is also required for tty control. -## We do have a capability right for this operation. +## Allow ioctl(2), which hopefully will be limited by applications only to +## required commands with cap_ioctls_limit(2) syscall. ## -## XXXRW: This needs to be revisited. -## -#ioctl +ioctl ## ## Allow querying current process credential state. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index acdea40..b146bab 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -110,15 +110,8 @@ MALLOC_DECLARE(M_FADVISE); static uma_zone_t file_zone; -/* Flags for do_dup() */ -#define DUP_FIXED 0x1 /* Force fixed allocation */ -#define DUP_FCNTL 0x2 /* fcntl()-style errors */ -#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ - static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); -static int do_dup(struct thread *td, int flags, int old, int new, - register_t *retval); static int fd_first_free(struct filedesc *fdp, int low, int size); static int fd_last_used(struct filedesc *fdp, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); @@ -166,7 +159,7 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); * the process exits. */ struct freetable { - struct file **ft_table; + struct filedescent *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -177,8 +170,7 @@ struct freetable { struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfiles[NDFILE]; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -284,7 +276,8 @@ fdunused(struct filedesc *fdp, int fd) FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd)); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) @@ -294,6 +287,20 @@ fdunused(struct filedesc *fdp, int fd) } /* + * Free a file descriptor. + */ +static inline void +fdfree(struct filedesc *fdp, int fd) +{ + struct filedescent *fde; + + fde = &fdp->fd_ofiles[fd]; + filecaps_free(&fde->fde_caps); + bzero(fde, sizeof(*fde)); + fdunused(fdp, fd); +} + +/* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ @@ -434,36 +441,14 @@ sys_fcntl(struct thread *td, struct fcntl_args *uap) return (error); } -static inline int -fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp) -{ - - FILEDESC_LOCK_ASSERT(fdp); - - *fpp = fget_locked(fdp, fd); - if (*fpp == NULL) - return (EBADF); - -#ifdef CAPABILITIES - if ((*fpp)->f_type == DTYPE_CAPABILITY) { - int err = cap_funwrap(*fpp, rights, fpp); - if (err != 0) { - *fpp = NULL; - return (err); - } - } -#endif /* CAPABILITIES */ - return (0); -} - int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; - struct file *fp; + struct file *fp, *fp2; + struct filedescent *fde; struct proc *p; - char *pop; struct vnode *vp; int error, flg, tmp; u_int old, new; @@ -505,8 +490,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; + fde = &fdp->fd_ofiles[fd]; + td->td_retval[0] = + (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_SUNLOCK(fdp); break; @@ -517,32 +503,24 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - *pop = (*pop &~ UF_EXCLOSE) | + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL); + if (error != 0) break; - } td->td_retval[0] = OFLAGS(fp->f_flag); - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); break; case F_SETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -550,7 +528,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error) { + if (error != 0) { fdrop(fp, td); break; } @@ -567,14 +545,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_GETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -582,14 +555,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_SETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -608,17 +576,15 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) case F_SETLK: do_setlk: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } + flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); @@ -627,16 +593,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset > OFF_MAX - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: @@ -703,37 +665,37 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ - FILEDESC_SLOCK(fdp); - if (fget_locked(fdp, fd) != fp) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL); + if (error != 0) { + fdrop(fp, td); + break; + } + if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); - } else - FILEDESC_SUNLOCK(fdp); + } fdrop(fp, td); + fdrop(fp2, td); break; case F_GETLK: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_SUNLOCK(fdp); error = EINVAL; + fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { @@ -744,15 +706,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset < OFF_MIN - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); @@ -763,19 +721,14 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: - FILEDESC_SLOCK(fdp); - if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_SUNLOCK(fdp); - error = EBADF; + error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); error = EBADF; break; } - fhold(fp); - FILEDESC_SUNLOCK(fdp); if (arg >= 0) { vp = fp->f_vnode; error = vn_lock(vp, LK_SHARED); @@ -809,11 +762,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ -static int +int do_dup(struct thread *td, int flags, int old, int new, register_t *retval) { struct filedesc *fdp; + struct filedescent *oldfde, *newfde; struct proc *p; struct file *fp; struct file *delfp; @@ -842,14 +796,15 @@ do_dup(struct thread *td, int flags, int old, int new, FILEDESC_XUNLOCK(fdp); return (EBADF); } + oldfde = &fdp->fd_ofiles[old]; if (flags & DUP_FIXED && old == new) { *retval = new; if (flags & DUP_CLOEXEC) - fdp->fd_ofileflags[new] |= UF_EXCLOSE; + fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return (0); } - fp = fdp->fd_ofiles[old]; + fp = oldfde->fde_file; fhold(fp); /* @@ -878,8 +833,10 @@ do_dup(struct thread *td, int flags, int old, int new, } #endif fdgrowtable(fdp, new + 1); + oldfde = &fdp->fd_ofiles[old]; } - if (fdp->fd_ofiles[new] == NULL) + newfde = &fdp->fd_ofiles[new]; + if (newfde->fde_file == NULL) fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { @@ -887,20 +844,23 @@ do_dup(struct thread *td, int flags, int old, int new, fdrop(fp, td); return (error); } + newfde = &fdp->fd_ofiles[new]; } - KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified")); + KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); KASSERT(old != new, ("new fd is same as old")); - delfp = fdp->fd_ofiles[new]; + delfp = newfde->fde_file; + /* * Duplicate the source descriptor. */ - fdp->fd_ofiles[new] = fp; + *newfde = *oldfde; + filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; @@ -1141,7 +1101,6 @@ static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { - struct file *fp_object; int error; FILEDESC_XLOCK_ASSERT(fdp); @@ -1167,12 +1126,10 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, knote_fdclose(td, fd); /* - * When we're closing an fd with a capability, we need to notify - * mqueue if the underlying object is of type mqueue. + * We need to notify mqueue if the object is of type mqueue. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_MQUEUE) - mq_fdclose(td, fd, fp_object); + if (fp->f_type == DTYPE_MQUEUE) + mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); @@ -1224,9 +1181,7 @@ kern_close(td, fd) FILEDESC_XUNLOCK(fdp); return (EBADF); } - fdp->fd_ofiles[fd] = NULL; - fdp->fd_ofileflags[fd] = 0; - fdunused(fdp, fd); + fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, 1)); @@ -1258,7 +1213,7 @@ sys_closefrom(struct thread *td, struct closefrom_args *uap) uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { - if (fdp->fd_ofiles[fd] != NULL) { + if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); @@ -1410,6 +1365,91 @@ out: } /* + * Initialize filecaps structure. + */ +void +filecaps_init(struct filecaps *fcaps) +{ + + bzero(fcaps, sizeof(*fcaps)); + fcaps->fc_nioctls = -1; +} + +/* + * Copy filecaps structure allocating memory for ioctls array if needed. + */ +void +filecaps_copy(const struct filecaps *src, struct filecaps *dst) +{ + size_t size; + + *dst = *src; + if (src->fc_ioctls != NULL) { + KASSERT(src->fc_nioctls > 0, + ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); + + size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; + dst->fc_ioctls = malloc(size, M_TEMP, M_WAITOK); + bcopy(src->fc_ioctls, dst->fc_ioctls, size); + } +} + +/* + * Move filecaps structure to the new place and clear the old place. + */ +static void +filecaps_move(struct filecaps *src, struct filecaps *dst) +{ + + *dst = *src; + bzero(src, sizeof(*src)); +} + +/* + * Fill the given filecaps structure with full rights. + */ +static void +filecaps_fill(struct filecaps *fcaps) +{ + + fcaps->fc_rights = CAP_ALL; + fcaps->fc_ioctls = NULL; + fcaps->fc_nioctls = -1; + fcaps->fc_fcntls = CAP_FCNTL_ALL; +} + +/* + * Free memory allocated within filecaps structure. + */ +void +filecaps_free(struct filecaps *fcaps) +{ + + free(fcaps->fc_ioctls, M_TEMP); + bzero(fcaps, sizeof(*fcaps)); +} + +/* + * Validate the given filecaps structure. + */ +static void +filecaps_validate(const struct filecaps *fcaps, const char *func) +{ + + KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0, + ("%s: invalid rights", func)); + KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, + ("%s: invalid fcntls", func)); + KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0, + ("%s: fcntls without CAP_FCNTL", func)); + KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : + (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), + ("%s: invalid ioctls", func)); + KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0, + ("%s: ioctls without CAP_IOCTL", func)); +} + +/* * Grow the file table to accomodate (at least) nfd descriptors. */ static void @@ -1417,9 +1457,8 @@ fdgrowtable(struct filedesc *fdp, int nfd) { struct filedesc0 *fdp0; struct freetable *ft; - struct file **ntable; - struct file **otable; - char *nfileflags, *ofileflags; + struct filedescent *ntable; + struct filedescent *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; @@ -1430,7 +1469,6 @@ fdgrowtable(struct filedesc *fdp, int nfd) /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_ofiles; - ofileflags = fdp->fd_ofileflags; omap = fdp->fd_map; /* compute the size of the new table */ @@ -1440,27 +1478,25 @@ fdgrowtable(struct filedesc *fdp, int nfd) return; /* - * Allocate a new table and map. We need enough space for a) the - * file entries themselves, b) the file flags, and c) the struct - * freetable we will use when we decommission the table and place - * it on the freelist. We place the struct freetable in the - * middle so we don't have to worry about padding. + * Allocate a new table and map. We need enough space for the + * file entries themselves and the struct freetable we will use + * when we decommission the table and place it on the freelist. + * We place the struct freetable in the middle so we don't have + * to worry about padding. */ - ntable = malloc(nnfiles * sizeof(*ntable) + sizeof(struct freetable) + - nnfiles * sizeof(*nfileflags), M_FILEDESC, M_ZERO | M_WAITOK); - nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable); + ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), + M_FILEDESC, M_ZERO | M_WAITOK); nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data over and point at the new tables */ memcpy(ntable, otable, onfiles * sizeof(*otable)); - memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags)); memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); /* update the pointers and counters */ fdp->fd_nfiles = nnfiles; + memcpy(ntable, otable, onfiles * sizeof(ntable[0])); fdp->fd_ofiles = ntable; - fdp->fd_ofileflags = nfileflags; fdp->fd_map = nmap; /* @@ -1536,8 +1572,9 @@ fdalloc(struct thread *td, int minfd, int *result) ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free")); - KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set")); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("file descriptor isn't free")); + KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); fdused(fdp, fd); *result = fd; return (0); @@ -1568,7 +1605,7 @@ fdavail(struct thread *td, int n) return (1); last = min(fdp->fd_nfiles, lim); for (i = fdp->fd_freefile; i < last; i++) { - if (fdp->fd_ofiles[i] == NULL && --n <= 0) + if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) return (1); } return (0); @@ -1591,7 +1628,7 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags) if (error) return (error); /* no reference held on error */ - error = finstall(td, fp, &fd, flags); + error = finstall(td, fp, &fd, flags, NULL); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); @@ -1645,13 +1682,17 @@ falloc_noinstall(struct thread *td, struct file **resultfp) * Install a file in a file descriptor table. */ int -finstall(struct thread *td, struct file *fp, int *fd, int flags) +finstall(struct thread *td, struct file *fp, int *fd, int flags, + struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; int error; KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); + if (fcaps != NULL) + filecaps_validate(fcaps, __func__); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { @@ -1659,9 +1700,14 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags) return (error); } fhold(fp); - fdp->fd_ofiles[*fd] = fp; + fde = &fdp->fd_ofiles[*fd]; + fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) - fdp->fd_ofileflags[*fd] |= UF_EXCLOSE; + fde->fde_flags |= UF_EXCLOSE; + if (fcaps != NULL) + filecaps_move(fcaps, &fde->fde_caps); + else + filecaps_fill(&fde->fde_caps); FILEDESC_XUNLOCK(fdp); return (0); } @@ -1696,7 +1742,6 @@ fdinit(struct filedesc *fdp) newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; @@ -1764,7 +1809,7 @@ fdunshare(struct proc *p, struct thread *td) FILEDESC_XUNLOCK(p->p_fd); tmp = fdcopy(p->p_fd); - fdfree(td); + fdescfree(td); p->p_fd = tmp; } else FILEDESC_XUNLOCK(p->p_fd); @@ -1778,6 +1823,7 @@ struct filedesc * fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; + struct filedescent *nfde, *ofde; int i; /* Certain daemons might not have file descriptors. */ @@ -1796,12 +1842,14 @@ fdcopy(struct filedesc *fdp) /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { + ofde = &fdp->fd_ofiles[i]; if (fdisused(fdp, i) && - (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) && - fdp->fd_ofiles[i]->f_ops != &badfileops) { - newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; - fhold(newfdp->fd_ofiles[i]); + (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && + ofde->fde_file->f_ops != &badfileops) { + nfde = &newfdp->fd_ofiles[i]; + *nfde = *ofde; + filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); + fhold(nfde->fde_file); newfdp->fd_lastfile = i; } else { if (newfdp->fd_freefile == -1) @@ -1811,9 +1859,10 @@ fdcopy(struct filedesc *fdp) newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); FILEDESC_XLOCK(newfdp); - for (i = 0; i <= newfdp->fd_lastfile; ++i) - if (newfdp->fd_ofiles[i] != NULL) + for (i = 0; i <= newfdp->fd_lastfile; ++i) { + if (newfdp->fd_ofiles[i].fde_file != NULL) fdused(newfdp, i); + } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; FILEDESC_XUNLOCK(newfdp); @@ -1824,7 +1873,7 @@ fdcopy(struct filedesc *fdp) * Release a filedesc structure. */ void -fdfree(struct thread *td) +fdescfree(struct thread *td) { struct filedesc *fdp; int i; @@ -1849,12 +1898,12 @@ fdfree(struct thread *td) if (fdtol != NULL) { FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, - ("filedesc_to_refcount botch: fdl_refcount=%d", - fdtol->fdl_refcount)); + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); @@ -1914,10 +1963,10 @@ fdfree(struct thread *td) return; for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL) { FILEDESC_XLOCK(fdp); - fdp->fd_ofiles[i] = NULL; + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } @@ -1982,6 +2031,7 @@ void setugidsafety(struct thread *td) { struct filedesc *fdp; + struct file *fp; int i; /* Certain daemons might not have file descriptors. */ @@ -1997,18 +2047,14 @@ setugidsafety(struct thread *td) for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { - struct file *fp; - + fp = fdp->fd_ofiles[i].fde_file; + if (fp != NULL && is_unsafe(fp)) { knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ - fp = fdp->fd_ofiles[i]; - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); FILEDESC_XLOCK(fdp); @@ -2029,9 +2075,8 @@ fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) { FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[idx] == fp) { - fdp->fd_ofiles[idx] = NULL; - fdunused(fdp, idx); + if (fdp->fd_ofiles[idx].fde_file == fp) { + fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else @@ -2045,6 +2090,7 @@ void fdcloseexec(struct thread *td) { struct filedesc *fdp; + struct filedescent *fde; struct file *fp; int i; @@ -2054,17 +2100,16 @@ fdcloseexec(struct thread *td) return; /* - * We cannot cache fd_ofiles or fd_ofileflags since operations + * We cannot cache fd_ofiles since operations * may block and rip them out from under us. */ FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fde = &fdp->fd_ofiles[i]; + fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || - (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + (fde->fde_flags & UF_EXCLOSE))) { + fdfree(fdp, i); (void) closefp(fdp, i, fp, td, 0); /* closefp() drops the FILEDESC lock. */ FILEDESC_XLOCK(fdp); @@ -2094,7 +2139,7 @@ fdcheckstd(struct thread *td) devnull = -1; error = 0; for (i = 0; i < 3; i++) { - if (fdp->fd_ofiles[i] != NULL) + if (fdp->fd_ofiles[i].fde_file != NULL) continue; if (devnull < 0) { save = td->td_retval[0]; @@ -2129,7 +2174,6 @@ closef(struct file *fp, struct thread *td) struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; - struct file *fp_object; /* * POSIX record locking dictates that any close releases ALL @@ -2142,13 +2186,9 @@ closef(struct file *fp, struct thread *td) * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. - * - * If this is a capability, we do lock processing under the underlying - * node, not the capability itself. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_VNODE && td != NULL) { - vp = fp_object->f_vnode; + if (fp->f_type == DTYPE_VNODE && td != NULL) { + vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -2177,7 +2217,7 @@ closef(struct file *fp, struct thread *td) lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = fp_object->f_vnode; + vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); @@ -2211,14 +2251,19 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } -struct file * -fget_unlocked(struct filedesc *fdp, int fd) +int +fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { struct file *fp; u_int count; +#ifdef CAPABILITIES + cap_rights_t haverights; + int error; +#endif if (fd < 0 || fd >= fdp->fd_nfiles) - return (NULL); + return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have @@ -2228,9 +2273,20 @@ fget_unlocked(struct filedesc *fdp, int fd) * due to preemption. */ for (;;) { - fp = fdp->fd_ofiles[fd]; + fp = fdp->fd_ofiles[fd].fde_file; if (fp == NULL) - break; + return (EBADF); +#ifdef CAPABILITIES + haverights = cap_rights(fdp, fd); + error = cap_check(haverights, needrights); + if (error != 0) + return (error); + if ((needrights & CAP_FCNTL) != 0) { + error = cap_fcntl_check(fdp, fd, needfcntl); + if (error != 0) + return (error); + } +#endif count = fp->f_count; if (count == 0) continue; @@ -2240,12 +2296,19 @@ fget_unlocked(struct filedesc *fdp, int fd) */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; - if (fp == fdp->fd_ofiles[fd]) + if (fp == fdp->fd_ofiles[fd].fde_file) break; fdrop(fp, curthread); } - - return (fp); + *fpp = fp; + if (haverightsp != NULL) { +#ifdef CAPABILITIES + *haverightsp = haverights; +#else + *haverightsp = CAP_ALL; +#endif + } + return (0); } /* @@ -2255,33 +2318,29 @@ fget_unlocked(struct filedesc *fdp, int fd) * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * - * If the FGET_GETCAP flag is set, the capability itself will be returned. - * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL. - * Otherwise, if the file is a capability, its rights will be checked against - * the capability rights mask, and if successful, the object will be unwrapped. + * File's rights will be checked against the capability rights mask. * * If an error occured the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ -#define FGET_GETCAP 0x00000001 static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, - cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp, - int fget_flags) + cap_rights_t needrights, u_char *maxprotp) { struct filedesc *fdp; struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; -#endif + cap_rights_t haverights; int error; *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + if (maxprotp != NULL) + needrights |= CAP_MMAP; + error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); @@ -2289,50 +2348,11 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, #ifdef CAPABILITIES /* - * If this is a capability, what rights does it have? + * If requested, convert capability rights to access flags. */ - if (haverightsp != NULL) { - if (fp->f_type == DTYPE_CAPABILITY) - *haverightsp = cap_rights(fp); - else - *haverightsp = CAP_MASK_VALID; - } - - /* - * If a capability has been requested, return the capability directly. - * Otherwise, check capability rights, extract the underlying object, - * and check its access flags. - */ - if (fget_flags & FGET_GETCAP) { - if (fp->f_type != DTYPE_CAPABILITY) { - fdrop(fp, td); - return (EINVAL); - } - } else { - if (maxprotp == NULL) - error = cap_funwrap(fp, needrights, &fp_fromcap); - else - error = cap_funwrap_mmap(fp, needrights, maxprotp, - &fp_fromcap); - if (error != 0) { - fdrop(fp, td); - return (error); - } - - /* - * If we've unwrapped a file, drop the original capability - * and hold the new descriptor. fp after this point refers to - * the actual (unwrapped) object, not the capability. - */ - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, td); - fp = fp_fromcap; - } - } + if (maxprotp != NULL) + *maxprotp = cap_rights_to_vmprot(haverights); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("%s: saw capability", __func__)); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #endif /* CAPABILITIES */ @@ -2371,7 +2391,7 @@ int fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, 0, rights, NULL)); } int @@ -2379,37 +2399,24 @@ fget_mmap(struct thread *td, int fd, cap_rights_t rights, u_char *maxprotp, struct file **fpp) { - return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0)); + return (_fget(td, fd, fpp, 0, rights, maxprotp)); } int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, FREAD, rights, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0)); + return (_fget(td, fd, fpp, FWRITE, rights, NULL)); } /* - * Unlike the other fget() calls, which accept and check capability rights - * but never return capabilities, fgetcap() returns the capability but doesn't - * check capability rights. - */ -int -fgetcap(struct thread *td, int fd, struct file **fpp) -{ - - return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP)); -} - - -/* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. @@ -2418,14 +2425,14 @@ fgetcap(struct thread *td, int fd, struct file **fpp) */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights, - cap_rights_t *haverightsp, struct vnode **vpp) + struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; - if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp, - NULL, 0)) != 0) + error = _fget(td, fd, &fp, flags, needrights, NULL); + if (error) return (error); if (fp->f_vnode == NULL) { error = EINVAL; @@ -2442,28 +2449,54 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, rights, NULL, vpp)); + return (_fgetvp(td, fd, 0, rights, vpp)); } int -fgetvp_rights(struct thread *td, int fd, cap_rights_t need, cap_rights_t *have, - struct vnode **vpp) +fgetvp_rights(struct thread *td, int fd, cap_rights_t need, + struct filecaps *havecaps, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, need, have, vpp)); + struct filedesc *fdp; + struct file *fp; +#ifdef CAPABILITIES + int error; +#endif + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (EBADF); + + fp = fget_locked(fdp, fd); + if (fp == NULL || fp->f_ops == &badfileops) + return (EBADF); + +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, fd), need); + if (error != 0) + return (error); +#endif + + if (fp->f_vnode == NULL) + return (EINVAL); + + *vpp = fp->f_vnode; + vref(*vpp); + filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); + + return (0); } int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FREAD, rights, NULL, vpp)); + return (_fgetvp(td, fd, FREAD, rights, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp)); + return (_fgetvp(td, fd, FEXEC, rights, vpp)); } #ifdef notyet @@ -2472,7 +2505,7 @@ fgetvp_write(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FWRITE, rights, NULL, vpp)); + return (_fgetvp(td, fd, FWRITE, rights, vpp)); } #endif @@ -2497,7 +2530,7 @@ fgetsock(struct thread *td, int fd, cap_rights_t rights, struct socket **spp, *spp = NULL; if (fflagp != NULL) *fflagp = 0; - if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0) + if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { error = ENOTSOCK; @@ -2533,9 +2566,6 @@ fputsock(struct socket *so) /* * Handle the last reference to a file being closed. - * - * No special capability handling here, as the capability's fo_close will run - * instead of the object here, and perform any necessary drop on the object. */ int _fdrop(struct file *fp, struct thread *td) @@ -2612,7 +2642,8 @@ done2: * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) +dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, + int openerror, int *indxp) { struct file *fp; int error, indx; @@ -2656,18 +2687,17 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int opener FILEDESC_XUNLOCK(fdp); return (EACCES); } - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(fp); + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, + &fdp->fd_ofiles[indx].fde_caps); break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofiles[dfd] = NULL; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fdp->fd_ofileflags[dfd] = 0; + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); fdunused(fdp, dfd); break; } @@ -2823,7 +2853,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; @@ -2935,7 +2965,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, fdp, req); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); @@ -2945,21 +2975,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) shmfd = NULL; kif->kf_fd = i; -#ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability. With - * ofiledesc, we don't have a field to export the cap_rights_t, - * but we do with the new filedesc. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - kif->kf_flags |= KF_FLAG_CAPABILITY; - (void)cap_funwrap(fp, 0, &fp); - } -#else - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_ofiledesc: saw capability")); -#endif switch (fp->f_type) { case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; @@ -3128,8 +3143,8 @@ CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); static int export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, - int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights, - struct kinfo_file *kif, struct sysctl_req *req) + int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, + struct sysctl_req *req) { struct { int fflag; @@ -3191,10 +3206,7 @@ export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, for (i = 0; i < NFFLAGS; i++) if (fflags & fflags_table[i].fflag) kif->kf_flags |= fflags_table[i].kf_fflag; - if (fd_is_cap) - kif->kf_flags |= KF_FLAG_CAPABILITY; - if (fd_is_cap) - kif->kf_cap_rights = fd_cap_rights; + kif->kf_cap_rights = fd_cap_rights; kif->kf_fd = fd; kif->kf_type = type; kif->kf_ref_count = refcnt; @@ -3222,7 +3234,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) int64_t offset; void *data; int error, i, *name; - int fd_is_cap, type, refcnt, fflags; + int type, refcnt, fflags; cap_rights_t fd_cap_rights; name = (int *)arg1; @@ -3252,13 +3264,13 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); if (tracevp != NULL) export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (textvp != NULL) export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); if (cttyvp != NULL) export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (fdp == NULL) goto fail; FILEDESC_SLOCK(fdp); @@ -3268,7 +3280,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_cdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* root directory */ @@ -3277,7 +3289,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_rdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* jail directory */ @@ -3286,30 +3298,17 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_jdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; data = NULL; - fd_is_cap = 0; - fd_cap_rights = 0; - #ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability and export - * the capability rights mask. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - fd_is_cap = 1; - fd_cap_rights = cap_rights(fp); - (void)cap_funwrap(fp, 0, &fp); - } + fd_cap_rights = cap_rights(fdp, i); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_filedesc: saw capability")); + fd_cap_rights = 0; #endif switch (fp->f_type) { case DTYPE_VNODE: @@ -3385,7 +3384,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SUNLOCK(fdp); error = export_fd_for_sysctl(data, type, i, fflags, refcnt, - offset, fd_is_cap, fd_cap_rights, kif, req); + offset, fd_cap_rights, kif, req); if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SLOCK(fdp); if (error) { @@ -3644,7 +3643,7 @@ file_to_first_proc(struct file *fp) if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { - if (fp == fdp->fd_ofiles[n]) + if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } @@ -3694,7 +3693,7 @@ DB_SHOW_COMMAND(files, db_show_files) if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 965ce31..7c0d2d6 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -438,9 +438,6 @@ interpret: } else { AUDIT_ARG_FD(args->fd); /* - * Some might argue that CAP_READ and/or CAP_MMAP should also - * be required here; such arguments will be entertained. - * * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 82f0344..5bd2daa 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -297,7 +297,7 @@ exit1(struct thread *td, int rv) * Close open files and release open-file table. * This may block! */ - fdfree(td); + fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 287d202..b5a4934 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -342,7 +342,7 @@ fork_norfproc(struct thread *td, int flags) if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(td->td_proc->p_fd); - fdfree(td); + fdescfree(td); p1->p_fd = fdtmp; } diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index 6fb4fee..ba168e9 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -1,11 +1,15 @@ /*- * Copyright (c) 2008-2011 Robert N. M. Watson * Copyright (c) 2010-2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -62,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -139,90 +144,48 @@ sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap) FEATURE(security_capabilities, "Capsicum Capabilities"); -/* - * struct capability describes a capability, and is hung off of its struct - * file f_data field. cap_file and cap_rightss are static once hooked up, as - * neither the object it references nor the rights it encapsulates are - * permitted to change. - */ -struct capability { - struct file *cap_object; /* Underlying object's file. */ - struct file *cap_file; /* Back-pointer to cap's file. */ - cap_rights_t cap_rights; /* Mask of rights on object. */ -}; +static inline int +_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) +{ + + + if ((need & ~have) != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(type, need, have); +#endif + return (ENOTCAPABLE); + } + return (0); +} /* - * Capabilities have a fileops vector, but in practice none should ever be - * called except for fo_close, as the capability will normally not be - * returned during a file descriptor lookup in the system call code. + * Test whether a capability grants the requested rights. */ -static fo_rdwr_t capability_read; -static fo_rdwr_t capability_write; -static fo_truncate_t capability_truncate; -static fo_ioctl_t capability_ioctl; -static fo_poll_t capability_poll; -static fo_kqfilter_t capability_kqfilter; -static fo_stat_t capability_stat; -static fo_close_t capability_close; -static fo_chmod_t capability_chmod; -static fo_chown_t capability_chown; - -static struct fileops capability_ops = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = DFLAG_PASSABLE, -}; - -static struct fileops capability_ops_unpassable = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = 0, -}; - -static uma_zone_t capability_zone; - -static void -capability_init(void *dummy __unused) +int +cap_check(cap_rights_t have, cap_rights_t need) { - capability_zone = uma_zcreate("capability", sizeof(struct capability), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - if (capability_zone == NULL) - panic("capability_init: capability_zone not initialized"); + return (_cap_check(have, need, CAPFAIL_NOTCAPABLE)); } -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL); /* - * Test whether a capability grants the requested rights. + * Convert capability rights into VM access flags. */ -static int -cap_check(struct capability *c, cap_rights_t rights) +u_char +cap_rights_to_vmprot(cap_rights_t have) { + u_char maxprot; - if ((c->cap_rights | rights) != c->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights); -#endif - return (ENOTCAPABLE); - } - return (0); + maxprot = VM_PROT_NONE; + if (have & CAP_MMAP_R) + maxprot |= VM_PROT_READ; + if (have & CAP_MMAP_W) + maxprot |= VM_PROT_WRITE; + if (have & CAP_MMAP_X) + maxprot |= VM_PROT_EXECUTE; + + return (maxprot); } /* @@ -231,43 +194,49 @@ cap_check(struct capability *c, cap_rights_t rights) * this one file. */ cap_rights_t -cap_rights(struct file *fp_cap) +cap_rights(struct filedesc *fdp, int fd) { - struct capability *c; - - KASSERT(fp_cap->f_type == DTYPE_CAPABILITY, - ("cap_rights: !capability")); - c = fp_cap->f_data; - return (c->cap_rights); + return (fdp->fd_ofiles[fd].fde_rights); } /* - * System call to create a new capability reference to either an existing - * file object or an an existing capability. + * System call to limit rights of the given capability. */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - int error, capfd; - int fd = uap->fd; - struct file *fp; - cap_rights_t rights = uap->rights; + struct filedesc *fdp; + cap_rights_t rights; + int error, fd; + + fd = uap->fd; + rights = uap->rights; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(rights); - error = fget(td, fd, rights, &fp); - if (error) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &capfd); - /* - * Release our reference to the file (kern_capwrap has held a reference - * for the filedesc array). - */ - fdrop(fp, td); - if (error == 0) - td->td_retval[0] = capfd; + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + if (error == 0) { + fdp->fd_ofiles[fd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[fd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[fd].fde_ioctls = NULL; + fdp->fd_ofiles[fd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[fd].fde_fcntls = 0; + } + FILEDESC_XUNLOCK(fdp); return (error); } @@ -275,247 +244,321 @@ sys_cap_new(struct thread *td, struct cap_new_args *uap) * System call to query the rights mask associated with a capability. */ int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - struct capability *cp; - struct file *fp; - int error; + struct filedesc *fdp; + cap_rights_t rights; + int fd; - AUDIT_ARG_FD(uap->fd); - error = fgetcap(td, uap->fd, &fp); - if (error) - return (error); - cp = fp->f_data; - error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp)); - fdrop(fp, td); - return (error); + fd = uap->fd; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = cap_rights(fdp, fd); + FILEDESC_SUNLOCK(fdp); + return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp))); } /* - * Create a capability to wrap around an existing file. + * Test whether a capability grants the given ioctl command. + * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and + * ENOTCAPABLE will be returned. */ int -kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfdp) +cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd) { - struct capability *cp, *cp_old; - struct file *fp_object, *fcapp; - int error; - - if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); + u_long *cmds; + ssize_t ncmds; + long i; - /* - * If a new capability is being derived from an existing capability, - * then the new capability rights must be a subset of the existing - * rights. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - cp_old = fp->f_data; - if ((cp_old->cap_rights | rights) != cp_old->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_INCREASE, - rights, cp_old->cap_rights); -#endif - return (ENOTCAPABLE); - } - } + FILEDESC_LOCK_ASSERT(fdp); + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - /* - * Allocate a new file descriptor to hang the capability off of. - */ - error = falloc(td, &fcapp, capfdp, fp->f_flag); - if (error) - return (error); + ncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (ncmds == -1) + return (0); - /* - * Rather than nesting capabilities, directly reference the object an - * existing capability references. There's nothing else interesting - * to preserve for future use, as we've incorporated the previous - * rights mask into the new one. This prevents us from having to - * deal with capability chains. - */ - if (fp->f_type == DTYPE_CAPABILITY) - fp_object = ((struct capability *)fp->f_data)->cap_object; - else - fp_object = fp; - fhold(fp_object); - cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - cp->cap_rights = rights; - cp->cap_object = fp_object; - cp->cap_file = fcapp; - if (fp->f_flag & DFLAG_PASSABLE) - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops); - else - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops_unpassable); + cmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + if (cmds[i] == cmd) + return (0); + } - /* - * Release our private reference (the proc filedesc still has one). - */ - fdrop(fcapp, td); - return (0); + return (ENOTCAPABLE); } /* - * Given a file descriptor, test it against a capability rights mask and then - * return the file descriptor on which to actually perform the requested - * operation. As long as the reference to fp_cap remains valid, the returned - * pointer in *fp will remain valid, so no extra reference management is - * required, and the caller should fdrop() fp_cap as normal when done with - * both. + * Check if the current ioctls list can be replaced by the new one. */ -int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +static int +cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds, + size_t ncmds) { - struct capability *c; - int error; + u_long *ocmds; + ssize_t oncmds; + u_long i; + long j; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; + oncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (oncmds == -1) return (0); + if (oncmds < (ssize_t)ncmds) + return (ENOTCAPABLE); + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + for (j = 0; j < oncmds; j++) { + if (cmds[i] == ocmds[j]) + break; + } + if (j == oncmds) + return (ENOTCAPABLE); } - c = fp_cap->f_data; - error = cap_check(c, rights); - if (error) - return (error); - *fpp = c->cap_object; + return (0); } -/* - * Slightly different routine for memory mapping file descriptors: unwrap the - * capability and check CAP_MMAP, but also return a bitmask representing the - * maximum mapping rights the capability allows on the object. - */ int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - struct capability *c; - u_char maxprot; - int error; + struct filedesc *fdp; + u_long *cmds, *ocmds; + size_t ncmds; + int error, fd; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + fd = uap->fd; + ncmds = uap->ncmds; + + AUDIT_ARG_FD(fd); + + if (ncmds > 256) /* XXX: Is 256 sane? */ + return (EINVAL); + + if (ncmds == 0) { + cmds = NULL; + } else { + cmds = malloc(sizeof(cmds[0]) * ncmds, M_TEMP, M_WAITOK); + error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds); + if (error != 0) { + free(cmds, M_TEMP); + return (error); + } } - c = fp_cap->f_data; - error = cap_check(c, rights | CAP_MMAP); - if (error) - return (error); - *fpp = c->cap_object; - maxprot = 0; - if (c->cap_rights & CAP_READ) - maxprot |= VM_PROT_READ; - if (c->cap_rights & CAP_WRITE) - maxprot |= VM_PROT_WRITE; - if (c->cap_rights & CAP_MAPEXEC) - maxprot |= VM_PROT_EXECUTE; - *maxprotp = maxprot; - return (0); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds); + if (error != 0) + goto out; + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + fdp->fd_ofiles[fd].fde_ioctls = cmds; + fdp->fd_ofiles[fd].fde_nioctls = ncmds; + + cmds = ocmds; + error = 0; +out: + FILEDESC_XUNLOCK(fdp); + free(cmds, M_TEMP); + return (error); } -/* - * When a capability is closed, simply drop the reference on the underlying - * object and free the capability. fdrop() will handle the case where the - * underlying object also needs to close, and the caller will have already - * performed any object-specific lock or mqueue handling. - */ -static int -capability_close(struct file *fp, struct thread *td) +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - struct capability *c; - struct file *fp_object; - - KASSERT(fp->f_type == DTYPE_CAPABILITY, - ("capability_close: !capability")); - - c = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fp_object = c->cap_object; - uma_zfree(capability_zone, c); - return (fdrop(fp_object, td)); + struct filedesc *fdp; + struct filedescent *fdep; + u_long *cmds; + size_t maxcmds; + int error, fd; + + fd = uap->fd; + cmds = uap->cmds; + maxcmds = uap->maxcmds; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + /* + * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL) + * the only sane thing we can do is to not populate the given array and + * return CAP_IOCTLS_ALL. + */ + + fdep = &fdp->fd_ofiles[fd]; + if (cmds != NULL && fdep->fde_ioctls != NULL) { + error = copyout(fdep->fde_ioctls, cmds, + sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds)); + if (error != 0) + goto out; + } + if (fdep->fde_nioctls == -1) + td->td_retval[0] = CAP_IOCTLS_ALL; + else + td->td_retval[0] = fdep->fde_nioctls; + + error = 0; +out: + FILEDESC_SUNLOCK(fdp); + return (error); } /* - * In general, file descriptor operations should never make it to the - * capability, only the underlying file descriptor operation vector, so panic - * if we do turn up here. + * Test whether a capability grants the given fcntl command. */ -static int -capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) { + uint32_t fcntlcap; - panic("capability_read"); -} - -static int -capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - panic("capability_write"); -} + fcntlcap = (1 << cmd); + KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, + ("Unsupported fcntl=%d.", cmd)); -static int -capability_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ + if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + return (0); - panic("capability_truncate"); + return (ENOTCAPABLE); } -static int -capability_ioctl(struct file *fp, u_long com, void *data, - struct ucred *active_cred, struct thread *td) +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { + struct filedesc *fdp; + uint32_t fcntlrights; + int fd; - panic("capability_ioctl"); -} + fd = uap->fd; + fcntlrights = uap->fcntlrights; -static int -capability_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ + AUDIT_ARG_FD(fd); + AUDIT_ARG_FCNTL_RIGHTS(fcntlrights); - panic("capability_poll"); -} + if ((fcntlrights & ~CAP_FCNTL_ALL) != 0) + return (EINVAL); -static int -capability_kqfilter(struct file *fp, struct knote *kn) -{ + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); - panic("capability_kqfilter"); -} + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } -static int -capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, - struct thread *td) -{ + if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) { + FILEDESC_XUNLOCK(fdp); + return (ENOTCAPABLE); + } - panic("capability_stat"); + fdp->fd_ofiles[fd].fde_fcntls = fcntlrights; + FILEDESC_XUNLOCK(fdp); + + return (0); } int -capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, - struct thread *td) +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { + struct filedesc *fdp; + uint32_t rights; + int fd; + + fd = uap->fd; - panic("capability_chmod"); + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = fdp->fd_ofiles[fd].fde_fcntls; + FILEDESC_SUNLOCK(fdp); + + return (copyout(&rights, uap->fcntlrightsp, sizeof(rights))); } +/* + * For backward compatibility. + */ int -capability_chown(struct file *fp, uid_t uid, gid_t gid, - struct ucred *active_cred, struct thread *td) +sys_cap_new(struct thread *td, struct cap_new_args *uap) { + struct filedesc *fdp; + cap_rights_t rights; + register_t newfd; + int error, fd; + + fd = uap->fd; + rights = uap->rights; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_RIGHTS(rights); + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + FILEDESC_SUNLOCK(fdp); + if (error != 0) + return (error); + + error = do_dup(td, 0, fd, 0, &newfd); + if (error != 0) + return (error); - panic("capability_chown"); + FILEDESC_XLOCK(fdp); + /* + * We don't really care about the race between checking capability + * rights for the source descriptor and now. If capability rights + * were ok at that earlier point, the process had this descriptor + * with those rights, so we don't increase them in security sense, + * the process might have done the cap_new(2) a bit earlier to get + * the same effect. + */ + fdp->fd_ofiles[newfd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[newfd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[newfd].fde_ioctls = NULL; + fdp->fd_ofiles[newfd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[newfd].fde_fcntls = 0; + FILEDESC_XUNLOCK(fdp); + + td->td_retval[0] = newfd; + + return (0); } #else /* !CAPABILITIES */ @@ -524,42 +567,54 @@ capability_chown(struct file *fp, uid_t uid, gid_t gid, * Stub Capability functions for when options CAPABILITIES isn't compiled * into the kernel. */ + int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { return (ENOSYS); } int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { return (ENOSYS); } int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - return (0); +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) +{ + + return (ENOSYS); } int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap_mmap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); +int +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) +{ + + return (ENOSYS); +} + +int +sys_cap_new(struct thread *td, struct cap_new_args *uap) +{ + + return (ENOSYS); } #endif /* CAPABILITIES */ diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index b97ff7f..39f33f3 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -244,7 +245,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_READ, &fp); if (error) return (error); error = dofileread(td, fd, fp, auio, (off_t)-1, 0); @@ -287,7 +288,7 @@ kern_preadv(td, fd, auio, offset) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -453,7 +454,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_WRITE, &fp); if (error) return (error); error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); @@ -496,7 +497,7 @@ kern_pwritev(td, fd, auio, offset) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -704,28 +705,60 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) { struct file *fp; struct filedesc *fdp; - int error; - int tmp; + int error, tmp, locked; AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); - if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) - return (error); - if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - fdrop(fp, td); - return (EBADF); - } + fdp = td->td_proc->p_fd; + switch (com) { case FIONCLEX: + case FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + locked = LA_XLOCKED; + break; + default: +#ifdef CAPABILITIES + FILEDESC_SLOCK(fdp); + locked = LA_SLOCKED; +#else + locked = LA_UNLOCKED; +#endif + break; + } + +#ifdef CAPABILITIES + if ((fp = fget_locked(fdp, fd)) == NULL) { + error = EBADF; + goto out; + } + if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { + fp = NULL; /* fhold() was not called yet */ + goto out; + } + fhold(fp); + if (locked == LA_SLOCKED) { + FILEDESC_SUNLOCK(fdp); + locked = LA_UNLOCKED; + } +#else + if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) { + fp = NULL; + goto out; + } +#endif + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + error = EBADF; + goto out; + } + + switch (com) { + case FIONCLEX: + fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; goto out; case FIOCLEX: - FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; goto out; case FIONBIO: if ((tmp = *(int *)data)) @@ -745,7 +778,21 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) error = fo_ioctl(fp, com, data, td->td_ucred, td); out: - fdrop(fp, td); + switch (locked) { + case LA_XLOCKED: + FILEDESC_XUNLOCK(fdp); + break; +#ifdef CAPABILITIES + case LA_SLOCKED: + FILEDESC_SUNLOCK(fdp); + break; +#endif + default: + FILEDESC_UNLOCK_ASSERT(fdp); + break; + } + if (fp != NULL) + fdrop(fp, td); return (error); } @@ -1130,32 +1177,8 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) static __inline int getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) { - struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; - int error; -#endif - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor references by the capability. - */ - error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); - if (error) { - fdrop(fp, curthread); - return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ - *fpp = fp; - return (0); + return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, 0, fpp, NULL)); } /* @@ -1349,13 +1372,14 @@ pollrescan(struct thread *td) /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - fp = fdp->fd_ofiles[fd->fd]; + fp = fdp->fd_ofiles[fd->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fd->fd), CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fd->revents = POLLNVAL; n++; continue; @@ -1408,9 +1432,8 @@ pollscan(td, fds, nfd) u_int nfd; { struct filedesc *fdp = td->td_proc->p_fd; - int i; struct file *fp; - int n = 0; + int i, n = 0; FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { @@ -1420,13 +1443,15 @@ pollscan(td, fds, nfd) } else if (fds->fd < 0) { fds->revents = 0; } else { - fp = fdp->fd_ofiles[fds->fd]; + fp = fdp->fd_ofiles[fds->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fds->fd), + CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fds->revents = POLLNVAL; n++; } else { diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 148dea3..1a89010 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -917,7 +917,7 @@ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET STD { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } @@ -955,5 +955,15 @@ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 5c7b753..02eccd7 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1840,23 +1840,15 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd, int error, ref; /* Validate the file descriptor. */ - if ((fdp = p->p_fd) == NULL) - return (EBADF); - - fp = fget_unlocked(fdp, fd); - if (fp == NULL) - return (EBADF); + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, CAP_TTYHOOK, 0, &fp, NULL); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } -#ifdef CAPABILITIES - error = cap_funwrap(fp, CAP_TTYHOOK, &fp); - if (error) - goto done1; -#endif - /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index 9da464c..2d18e77 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -45,6 +45,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_capsicum.h" #include "opt_compat.h" #include @@ -2032,8 +2033,8 @@ kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode, &mqueueops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); @@ -2275,11 +2276,13 @@ again: error = EBADF; goto out; } - error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2); +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, uap->mqd), CAP_POLL_EVENT); if (error) { FILEDESC_SUNLOCK(fdp); goto out; } +#endif if (fp2 != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; diff --git a/sys/kern/uipc_sem.c b/sys/kern/uipc_sem.c index c219844..2de3409 100644 --- a/sys/kern/uipc_sem.c +++ b/sys/kern/uipc_sem.c @@ -579,8 +579,8 @@ ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); fdrop(fp, td); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 7f75bdc..0cbb8b3 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -629,8 +629,8 @@ sys_shm_open(struct thread *td, struct shm_open_args *uap) finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 665eb6d..847db35 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -121,38 +121,20 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); /* - * Convert a user file descriptor to a kernel file entry and check that, if - * it is a capability, the right rights are present. A reference on the file - * entry is held upon returning. + * Convert a user file descriptor to a kernel file entry and check if required + * capability rights are present. + * A reference on the file entry is held upon returning. */ static int getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp, u_int *fflagp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (ENOTSOCK); @@ -765,7 +747,7 @@ kern_sendit(td, s, mp, flags, control, segflg) #endif AUDIT_ARG_FD(s); - rights = CAP_WRITE; + rights = CAP_SEND; if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, mp->msg_name); rights |= CAP_CONNECT; @@ -974,7 +956,7 @@ kern_recvit(td, s, mp, fromseg, controlp) *controlp = NULL; AUDIT_ARG_FD(s); - error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1850,7 +1832,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * we send only the header/trailer and no payload data. */ AUDIT_ARG_FD(uap->fd); - if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0) + /* + * sendfile(2) can start at any offset within a file so we require + * CAP_READ+CAP_SEEK = CAP_PREAD. + */ + if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) goto out; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { @@ -1886,7 +1872,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The socket must be a stream socket and connected. * Remember if it a blocking or non-blocking socket. */ - if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE, + if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, &sock_fp, NULL)) != 0) goto out; so = sock_fp->f_data; @@ -2423,7 +2409,7 @@ sys_sctp_generic_sendmsg (td, uap) u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2534,7 +2520,7 @@ sys_sctp_generic_sendmsg_iov(td, uap) return (error); u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2658,7 +2644,7 @@ sys_sctp_generic_recvmsg(td, uap) #endif AUDIT_ARG_FD(uap->sd); - error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); if (error) { return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a6c308f..dcfd009 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -279,7 +279,7 @@ static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_discard(struct file *); -static void unp_freerights(struct file **, int); +static void unp_freerights(struct filedescent *, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); @@ -1642,14 +1642,14 @@ unp_drop(struct unpcb *unp, int errno) } static void -unp_freerights(struct file **rp, int fdcount) +unp_freerights(struct filedescent *fde, int fdcount) { - int i; struct file *fp; + int i; - for (i = 0; i < fdcount; i++) { - fp = *rp; - *rp++ = NULL; + for (i = 0; i < fdcount; i++, fde++) { + fp = fde->fde_file; + bzero(fde, sizeof(*fde)); unp_discard(fp); } } @@ -1661,8 +1661,8 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; - struct file **rp; - struct file *fp; + struct filedesc *fdesc = td->td_proc->p_fd; + struct filedescent *fde, *fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; @@ -1683,20 +1683,20 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - newfds = datalen / sizeof(struct file *); - rp = data; + newfds = datalen / sizeof(*fdep); + fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } - FILEDESC_XLOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(fdesc); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } @@ -1710,23 +1710,24 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = E2BIG; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < newfds; i++) { + for (i = 0; i < newfds; i++, fdep++, fdp++) { if (fdalloc(td, 0, &f)) panic("unp_externalize fdalloc failed"); - fp = *rp++; - td->td_proc->p_fd->fd_ofiles[f] = fp; - unp_externalize_fp(fp); - *fdp++ = f; + fde = &fdesc->fd_ofiles[f]; + fde->fde_file = fdep->fde_file; + filecaps_copy(&fdep->fde_caps, &fde->fde_caps); + unp_externalize_fp(fde->fde_file); + *fdp = f; } - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) @@ -1797,11 +1798,11 @@ unp_internalize(struct mbuf **controlp, struct thread *td) { struct mbuf *control = *controlp; struct proc *p = td->td_proc; - struct filedesc *fdescp = p->p_fd; + struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; - struct file **rp; + struct filedescent *fde, *fdep; struct file *fp; struct timeval *tv; int i, fd, *fdp; @@ -1854,18 +1855,17 @@ unp_internalize(struct mbuf **controlp, struct thread *td) * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_SLOCK(fdescp); + FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++) { fd = *fdp++; - if (fd < 0 || fd >= fdescp->fd_nfiles || - fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_SUNLOCK(fdescp); + if (fget_locked(fdesc, fd) == NULL) { + FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } - fp = fdescp->fd_ofiles[fd]; + fp = fdesc->fd_ofiles[fd].fde_file; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } @@ -1874,25 +1874,26 @@ unp_internalize(struct mbuf **controlp, struct thread *td) /* * Now replace the integer FDs with pointers to the - * associated global file table entry.. + * file structure and capability rights. */ - newlen = oldfds * sizeof(struct file *); + newlen = oldfds * sizeof(*fdep); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; - rp = (struct file **) + fdep = (struct filedescent *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < oldfds; i++) { - fp = fdescp->fd_ofiles[*fdp++]; - *rp++ = fp; - unp_internalize_fp(fp); + for (i = 0; i < oldfds; i++, fdep++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep->fde_file = fde->fde_file; + filecaps_copy(&fde->fde_caps, &fdep->fde_caps); + unp_internalize_fp(fdep->fde_file); } - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: @@ -2252,7 +2253,7 @@ static void unp_scan(struct mbuf *m0, void (*op)(struct file *)) { struct mbuf *m; - struct file **rp; + struct filedescent *fdep; struct cmsghdr *cm; void *data; int i; @@ -2277,10 +2278,10 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - qfds = datalen / sizeof (struct file *); - rp = data; - for (i = 0; i < qfds; i++) - (*op)(*rp++); + qfds = datalen / sizeof(*fdep); + fdep = data; + for (i = 0; i < qfds; i++, fdep++) + (*op)(fdep->fde_file); } if (CMSG_SPACE(datalen) < clen) { diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 99b0197..cba1638 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1593,16 +1593,16 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, fd = aiocbe->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); break; case LIO_READ: - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); break; case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; case LIO_NOP: - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); break; default: error = EINVAL; diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index fbde152..94d11f2 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -227,17 +227,18 @@ namei(struct nameidata *ndp) AUDIT_ARG_ATFD2(ndp->ni_dirfd); error = fgetvp_rights(td, ndp->ni_dirfd, ndp->ni_rightsneeded | CAP_LOOKUP, - &(ndp->ni_baserights), &dp); + &ndp->ni_filecaps, &dp); #ifdef CAPABILITIES /* - * Lookups relative to a capability must also be + * If file descriptor doesn't have all rights, + * all lookups relative to it must also be * strictly relative. - * - * Note that a capability with rights CAP_MASK_VALID - * is treated exactly like a regular file descriptor. */ - if (ndp->ni_baserights != CAP_MASK_VALID) + if (ndp->ni_filecaps.fc_rights != CAP_ALL || + ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || + ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_strictrelative = 1; + } #endif } if (error != 0 || dp != NULL) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bd44a3a..787399a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -970,6 +970,8 @@ flags_to_rights(int flags) /* FALLTHROUGH */ case O_WRONLY: rights |= CAP_WRITE; + if (!(flags & O_APPEND)) + rights |= CAP_SEEK; break; } } @@ -1143,19 +1145,22 @@ success: * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { + struct filecaps *fcaps; + #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) { - /* - * We are doing a strict relative lookup; wrap the - * result in a capability. - */ - if ((error = kern_capwrap(td, fp, nd.ni_baserights, - &indx)) != 0) - goto bad; - } else + if (nd.ni_strictrelative == 1) + fcaps = &nd.ni_filecaps; + else #endif - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad; + fcaps = NULL; + error = finstall(td, fp, &indx, flags, fcaps); + /* On success finstall() consumes fcaps. */ + if (error != 0) { + filecaps_free(&nd.ni_filecaps); + goto bad; + } + } else { + filecaps_free(&nd.ni_filecaps); } /* @@ -1279,7 +1284,7 @@ kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKNOD, td); + pathseg, path, fd, CAP_MKNODAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -1399,7 +1404,7 @@ kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKFIFO, td); + pathseg, path, fd, CAP_MKFIFOAT, td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { @@ -1553,7 +1558,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, return (error); } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, - segflg, path2, fd2, CAP_CREATE, td); + segflg, path2, fd2, CAP_LINKAT, td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) @@ -1646,7 +1651,7 @@ kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path2, fd, CAP_CREATE, td); + segflg, path2, fd, CAP_SYMLINKAT, td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { @@ -1798,7 +1803,7 @@ kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_DELETE, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; @@ -3502,10 +3507,10 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | - AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); + AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, - pathseg, old, oldfd, CAP_DELETE, td); + pathseg, old, oldfd, CAP_RENAMEAT, td); #endif if ((error = namei(&fromnd)) != 0) @@ -3527,7 +3532,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, goto out1; } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | - SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, td); + SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { @@ -3550,6 +3555,15 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, error = EISDIR; goto out; } +#ifdef CAPABILITIES + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); + if (error != 0) + goto out; +#endif } if (fvp == tdvp) { error = EINVAL; @@ -3650,7 +3664,7 @@ kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path, fd, CAP_MKDIR, td); + segflg, path, fd, CAP_MKDIRAT, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); @@ -3734,7 +3748,7 @@ kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_RMDIR, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -3987,8 +4001,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; - if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, - &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -4151,33 +4164,14 @@ out: * entry is held upon returning. */ int -getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, - struct file **fpp) +getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use the - * file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ /* * The file could be not of the vnode type, or it may be not @@ -4361,7 +4355,7 @@ sys_fhopen(td, uap) goto bad; } - error = finstall(td, fp, &indx, fmode); + error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; @@ -4614,7 +4608,7 @@ kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); if (error != 0) goto out; -- cgit v1.1 From 48e0f13795816420f399e2606735c7ef9dab3d7c Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:55:09 +0000 Subject: Regen after r247602. --- sys/kern/init_sysent.c | 11 +++- sys/kern/syscalls.c | 9 ++- sys/kern/systrace_args.c | 146 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 157 insertions(+), 9 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index ce47f56..b5ed57b 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #include "opt_compat.h" @@ -88,7 +88,7 @@ struct sysent sysent[] = { { AS(acct_args), (sy_call_t *)sys_acct, AUE_ACCT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 51 = acct */ { compat(0,sigpending), AUE_SIGPENDING, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 52 = old sigpending */ { AS(sigaltstack_args), (sy_call_t *)sys_sigaltstack, AUE_SIGALTSTACK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 53 = sigaltstack */ - { AS(ioctl_args), (sy_call_t *)sys_ioctl, AUE_IOCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 54 = ioctl */ + { AS(ioctl_args), (sy_call_t *)sys_ioctl, AUE_IOCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 54 = ioctl */ { AS(reboot_args), (sy_call_t *)sys_reboot, AUE_REBOOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 55 = reboot */ { AS(revoke_args), (sy_call_t *)sys_revoke, AUE_REVOKE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 56 = revoke */ { AS(symlink_args), (sy_call_t *)sys_symlink, AUE_SYMLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 57 = symlink */ @@ -549,7 +549,7 @@ struct sysent sysent[] = { { AS(shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { AS(cap_new_args), (sy_call_t *)sys_cap_new, AUE_CAP_NEW, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 514 = cap_new */ - { AS(cap_getrights_args), (sy_call_t *)sys_cap_getrights, AUE_CAP_GETRIGHTS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_getrights */ + { AS(cap_rights_get_args), (sy_call_t *)sys_cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 517 = cap_getmode */ { AS(pdfork_args), (sy_call_t *)sys_pdfork, AUE_PDFORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 518 = pdfork */ @@ -567,4 +567,9 @@ struct sysent sysent[] = { { AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = posix_fallocate */ { AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */ { AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */ + { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 533 = cap_rights_limit */ + { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ + { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ + { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ + { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ }; diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 96f2400..e066014 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ const char *syscallnames[] = { @@ -522,7 +522,7 @@ const char *syscallnames[] = { "shmctl", /* 512 = shmctl */ "lpathconf", /* 513 = lpathconf */ "cap_new", /* 514 = cap_new */ - "cap_getrights", /* 515 = cap_getrights */ + "cap_rights_get", /* 515 = cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "pdfork", /* 518 = pdfork */ @@ -540,4 +540,9 @@ const char *syscallnames[] = { "posix_fallocate", /* 530 = posix_fallocate */ "posix_fadvise", /* 531 = posix_fadvise */ "wait6", /* 532 = wait6 */ + "cap_rights_limit", /* 533 = cap_rights_limit */ + "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ + "cap_ioctls_get", /* 535 = cap_ioctls_get */ + "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ + "cap_fcntls_get", /* 537 = cap_fcntls_get */ }; diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index c755f92..d811da2 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3134,9 +3134,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } - /* cap_getrights */ + /* cap_rights_get */ case 515: { - struct cap_getrights_args *p = params; + struct cap_rights_get_args *p = params; iarg[0] = p->fd; /* int */ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */ *n_args = 2; @@ -3286,6 +3286,48 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 6; break; } + /* cap_rights_limit */ + case 533: { + struct cap_rights_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->rights; /* uint64_t */ + *n_args = 2; + break; + } + /* cap_ioctls_limit */ + case 534: { + struct cap_ioctls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* const u_long * */ + uarg[2] = p->ncmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_ioctls_get */ + case 535: { + struct cap_ioctls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* u_long * */ + uarg[2] = p->maxcmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_fcntls_limit */ + case 536: { + struct cap_fcntls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->fcntlrights; /* uint32_t */ + *n_args = 2; + break; + } + /* cap_fcntls_get */ + case 537: { + struct cap_fcntls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->fcntlrightsp; /* uint32_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -8477,7 +8519,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: switch(ndx) { case 0: @@ -8745,6 +8787,77 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* cap_rights_limit */ + case 533: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint64_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_limit */ + case 534: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "const u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_get */ + case 535: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_limit */ + case 536: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_get */ + case 537: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -10556,7 +10669,7 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: if (ndx == 0 || ndx == 1) p = "int"; @@ -10638,6 +10751,31 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* cap_rights_limit */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_limit */ + case 534: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_get */ + case 535: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; + /* cap_fcntls_limit */ + case 536: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_fcntls_get */ + case 537: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; -- cgit v1.1 From 73650b4f3af1f7c8b2448569209fcdff427c08db Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 09:58:47 +0000 Subject: If the target file already exists, check for the CAP_UNLINKAT capabiity right on the target directory descriptor, but only if this is renameat(2) and real target directory descriptor is given (not AT_FDCWD). Without this fix regular rename(2) fails if the target file already exists. Reported by: Michael Butler Reported by: Larry Rosenman Sponsored by: The FreeBSD Foundation --- sys/kern/vfs_syscalls.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 787399a..4c1d97c 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3556,13 +3556,16 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, goto out; } #ifdef CAPABILITIES - /* - * If the target already exists we require CAP_UNLINKAT - * from 'newfd'. - */ - error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); - if (error != 0) - goto out; + if (newfd != AT_FDCWD) { + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, + CAP_UNLINKAT); + if (error != 0) + goto out; + } #endif } if (fvp == tdvp) { -- cgit v1.1