summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-05-03 08:53:59 -0300
committerRenato Botelho <renato@netgate.com>2016-05-03 08:53:59 -0300
commit501575fb1206644a3ea8c2cd64a81084745445cc (patch)
treee07e5ad3f3ff6f6cf2841dd2d2eb0dcb0e54521a /sys/kern
parent91f599cbc0d103dd112a2472b589573724b8d70a (diff)
parent04acf11bf47629b82fc88ce0e6d6dc642b1e641b (diff)
downloadFreeBSD-src-501575fb1206644a3ea8c2cd64a81084745445cc.zip
FreeBSD-src-501575fb1206644a3ea8c2cd64a81084745445cc.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_jail.c214
-rw-r--r--sys/kern/kern_osd.c263
-rw-r--r--sys/kern/sysv_msg.c378
-rw-r--r--sys/kern/sysv_sem.c390
-rw-r--r--sys/kern/sysv_shm.c330
5 files changed, 1274 insertions, 301 deletions
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 0d52c7b..0ea7276 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -560,8 +560,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
void *op;
#endif
unsigned long hid;
- size_t namelen, onamelen;
- int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
+ size_t namelen, onamelen, pnamelen;
+ int born, created, cuflags, descend, enforce;
+ int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
int fi, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
@@ -584,7 +585,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
- mypr = ppr = td->td_ucred->cr_prison;
+ mypr = td->td_ucred->cr_prison;
if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
@@ -611,6 +612,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#endif
g_path = NULL;
+ cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
+ if (!cuflags) {
+ error = EINVAL;
+ vfs_opterror(opts, "no valid operation (create or update)");
+ goto done_errmsg;
+ }
+
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@@ -1020,42 +1028,18 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
/*
- * Grab the allprison lock before letting modules check their
- * parameters. Once we have it, do not let go so we'll have a
- * consistent view of the OSD list.
- */
- sx_xlock(&allprison_lock);
- error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
- if (error)
- goto done_unlock_list;
-
- /* By now, all parameters should have been noted. */
- TAILQ_FOREACH(opt, opts, link) {
- if (!opt->seen && strcmp(opt->name, "errmsg")) {
- error = EINVAL;
- vfs_opterror(opts, "unknown parameter: %s", opt->name);
- goto done_unlock_list;
- }
- }
-
- /*
- * See if we are creating a new record or updating an existing one.
+ * Find the specified jail, or at least its parent.
* This abuses the file error codes ENOENT and EEXIST.
*/
- cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
- if (!cuflags) {
- error = EINVAL;
- vfs_opterror(opts, "no valid operation (create or update)");
- goto done_unlock_list;
- }
pr = NULL;
- namelc = NULL;
+ ppr = mypr;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
if (*p != '\0')
jid = 0;
}
+ sx_xlock(&allprison_lock);
if (jid != 0) {
/*
* See if a requested jid already exists. There is an
@@ -1121,6 +1105,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* and updates keyed by the name itself (where the name must exist
* because that is the jail being updated).
*/
+ namelc = NULL;
if (name != NULL) {
namelc = strrchr(name, '.');
if (namelc == NULL)
@@ -1131,7 +1116,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* parent and child names, and make sure the parent
* exists or matches an already found jail.
*/
- *namelc = '\0';
if (pr != NULL) {
if (strncmp(name, ppr->pr_name, namelc - name)
|| ppr->pr_name[namelc - name] != '\0') {
@@ -1142,6 +1126,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_unlock_list;
}
} else {
+ *namelc = '\0';
ppr = prison_find_name(mypr, name);
if (ppr == NULL) {
error = ENOENT;
@@ -1150,17 +1135,18 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_unlock_list;
}
mtx_unlock(&ppr->pr_mtx);
+ *namelc = '.';
}
- name = ++namelc;
+ namelc++;
}
- if (name[0] != '\0') {
- namelen =
+ if (namelc[0] != '\0') {
+ pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
if (tpr != pr && tpr->pr_ref > 0 &&
- !strcmp(tpr->pr_name + namelen, name)) {
+ !strcmp(tpr->pr_name + pnamelen, namelc)) {
if (pr == NULL &&
cuflags != JAIL_CREATE) {
mtx_lock(&tpr->pr_mtx);
@@ -1234,10 +1220,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
created = 1;
mtx_lock(&ppr->pr_mtx);
- if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
+ if (ppr->pr_ref == 0) {
mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
- vfs_opterror(opts, "parent jail went away!");
+ vfs_opterror(opts, "jail \"%s\" not found",
+ prison_name(mypr, ppr));
goto done_unlock_list;
}
ppr->pr_ref++;
@@ -1291,8 +1278,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
pr->pr_id = jid;
/* Set some default values, and inherit some from the parent. */
- if (name == NULL)
- name = "";
+ if (namelc == NULL)
+ namelc = "";
if (path == NULL) {
path = "/";
root = mypr->pr_root;
@@ -1355,6 +1342,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
+ TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
#ifdef VIMAGE
/* Allocate a new vnet if specified. */
@@ -1374,7 +1362,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
mtx_lock(&pr->pr_mtx);
/*
* New prisons do not yet have a reference, because we do not
- * want other to see the incomplete prison once the
+ * want others to see the incomplete prison once the
* allprison_lock is downgraded.
*/
} else {
@@ -1588,13 +1576,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
#endif
onamelen = namelen = 0;
- if (name != NULL) {
+ if (namelc != NULL) {
/* Give a default name of the jid. Also allow the name to be
* explicitly the jid - but not any other number, and only in
* normal form (no leading zero/etc).
*/
- if (name[0] == '\0')
- snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
+ if (namelc[0] == '\0')
+ snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
else if ((strtoul(namelc, &p, 10) != jid ||
namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
error = EINVAL;
@@ -1606,9 +1594,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Make sure the name isn't too long for the prison or its
* children.
*/
- onamelen = strlen(pr->pr_name);
- namelen = strlen(name);
- if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
+ pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
+ onamelen = strlen(pr->pr_name + pnamelen);
+ namelen = strlen(namelc);
+ if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
error = ENAMETOOLONG;
goto done_deref_locked;
}
@@ -1625,6 +1614,30 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref_locked;
}
+ /*
+ * Let modules check their parameters. This requires unlocking and
+ * then re-locking the prison, but this is still a valid state as long
+ * as allprison_lock remains xlocked.
+ */
+ mtx_unlock(&pr->pr_mtx);
+ error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
+ if (error != 0) {
+ prison_deref(pr, created
+ ? PD_LIST_XLOCKED
+ : PD_DEREF | PD_LIST_XLOCKED);
+ goto done_releroot;
+ }
+ mtx_lock(&pr->pr_mtx);
+
+ /* At this point, all valid parameters should have been noted. */
+ TAILQ_FOREACH(opt, opts, link) {
+ if (!opt->seen && strcmp(opt->name, "errmsg")) {
+ error = EINVAL;
+ vfs_opterror(opts, "unknown parameter: %s", opt->name);
+ goto done_deref_locked;
+ }
+ }
+
/* Set the parameters of the prison. */
#ifdef INET
redo_ip4 = 0;
@@ -1698,12 +1711,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
tpr->pr_devfs_rsnum = rsnum;
}
- if (name != NULL) {
+ if (namelc != NULL) {
if (ppr == &prison0)
- strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
+ strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
else
snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
- ppr->pr_name, name);
+ ppr->pr_name, namelc);
/* Change this component of child names. */
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
@@ -1781,6 +1794,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* for now, so new ones will remain unseen until after the module
* handlers have completed.
*/
+ born = pr->pr_uref == 0;
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
if (pr_flags & PR_PERSIST) {
pr->pr_ref++;
@@ -1850,15 +1864,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
- if (created) {
+ if (born) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
if (error) {
- prison_deref(pr, PD_LIST_SLOCKED);
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ prison_deref(pr, created
+ ? PD_LIST_SLOCKED
+ : PD_DEREF | PD_LIST_SLOCKED);
goto done_errmsg;
}
}
error = osd_jail_call(pr, PR_METHOD_SET, opts);
if (error) {
+ if (born)
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
prison_deref(pr, created
? PD_LIST_SLOCKED
: PD_DEREF | PD_LIST_SLOCKED);
@@ -1910,7 +1929,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
sx_sunlock(&allprison_lock);
}
- goto done_errmsg;
+ goto done_free;
done_deref_locked:
prison_deref(pr, created
@@ -2290,7 +2309,6 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
/* Remove all descendants of this prison, then remove this prison. */
pr->pr_ref++;
- pr->pr_flags |= PR_REMOVE;
if (!LIST_EMPTY(&pr->pr_children)) {
mtx_unlock(&pr->pr_mtx);
lpr = NULL;
@@ -2299,7 +2317,6 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
if (cpr->pr_ref > 0) {
tpr = cpr;
cpr->pr_ref++;
- cpr->pr_flags |= PR_REMOVE;
} else {
/* Already removed - do not do it again. */
tpr = NULL;
@@ -2406,7 +2423,6 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
static int
do_jail_attach(struct thread *td, struct prison *pr)
{
- struct prison *ppr;
struct proc *p;
struct ucred *newcred, *oldcred;
int error;
@@ -2434,7 +2450,6 @@ do_jail_attach(struct thread *td, struct prison *pr)
/*
* Reparent the newly attached process to this jail.
*/
- ppr = td->td_ucred->cr_prison;
p = td->td_proc;
error = cpuset_setproc_update_set(p, pr->pr_cpuset);
if (error)
@@ -2453,23 +2468,23 @@ do_jail_attach(struct thread *td, struct prison *pr)
newcred = crget();
PROC_LOCK(p);
- oldcred = p->p_ucred;
- setsugid(p);
- crcopy(newcred, oldcred);
+ oldcred = crcopysafe(p, newcred);
newcred->cr_prison = pr;
p->p_ucred = newcred;
+ setsugid(p);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
#endif
+ prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
crfree(oldcred);
- prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
+
e_unlock:
VOP_UNLOCK(pr->pr_root, 0);
e_revert_osd:
/* Tell modules this thread is still in its old jail after all. */
- (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
+ (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
prison_deref(pr, PD_DEREF | PD_DEUREF);
return (error);
}
@@ -2578,16 +2593,13 @@ prison_allow(struct ucred *cred, unsigned flag)
void
prison_free_locked(struct prison *pr)
{
+ int ref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
- pr->pr_ref--;
- if (pr->pr_ref == 0) {
- mtx_unlock(&pr->pr_mtx);
- TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
- taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
- return;
- }
+ ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
+ if (ref == 0)
+ taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
void
@@ -2598,11 +2610,17 @@ prison_free(struct prison *pr)
prison_free_locked(pr);
}
+/*
+ * Complete a call to either prison_free or prison_proc_free.
+ */
static void
prison_complete(void *context, int pending)
{
+ struct prison *pr = context;
- prison_deref((struct prison *)context, 0);
+ mtx_lock(&pr->pr_mtx);
+ prison_deref(pr, pr->pr_uref
+ ? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED);
}
/*
@@ -2615,19 +2633,53 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prison *ppr, *tpr;
+ int ref, lasturef;
if (!(flags & PD_LOCKED))
mtx_lock(&pr->pr_mtx);
for (;;) {
if (flags & PD_DEUREF) {
+ KASSERT(pr->pr_uref > 0,
+ ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+ pr->pr_id));
pr->pr_uref--;
+ lasturef = pr->pr_uref == 0;
+ if (lasturef)
+ pr->pr_ref++;
KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
- }
- if (flags & PD_DEREF)
+ } else
+ lasturef = 0;
+ if (flags & PD_DEREF) {
+ KASSERT(pr->pr_ref > 0,
+ ("prison_deref PD_DEREF on a dead prison (jid=%d)",
+ pr->pr_id));
pr->pr_ref--;
- /* If the prison still has references, nothing else to do. */
- if (pr->pr_ref > 0) {
+ }
+ ref = pr->pr_ref;
+ mtx_unlock(&pr->pr_mtx);
+
+ /*
+ * Tell the modules if the last user reference was removed
+ * (even it sticks around in dying state).
+ */
+ if (lasturef) {
+ if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
+ if (ref > 1) {
+ sx_slock(&allprison_lock);
+ flags |= PD_LIST_SLOCKED;
+ } else {
+ sx_xlock(&allprison_lock);
+ flags |= PD_LIST_XLOCKED;
+ }
+ }
+ (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+ mtx_lock(&pr->pr_mtx);
+ ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
+ }
+
+ /* If the prison still has references, nothing else to do. */
+ if (ref > 0) {
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@@ -2635,7 +2687,6 @@ prison_deref(struct prison *pr, int flags)
return;
}
- mtx_unlock(&pr->pr_mtx);
if (flags & PD_LIST_SLOCKED) {
if (!sx_try_upgrade(&allprison_lock)) {
sx_sunlock(&allprison_lock);
@@ -2717,7 +2768,20 @@ prison_proc_free(struct prison *pr)
mtx_lock(&pr->pr_mtx);
KASSERT(pr->pr_uref > 0,
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
- prison_deref(pr, PD_DEUREF | PD_LOCKED);
+ if (pr->pr_uref > 1)
+ pr->pr_uref--;
+ else {
+ /*
+ * Don't remove the last user reference in this context, which
+ * is expected to be a process that is not only locked, but
+ * also half dead.
+ */
+ pr->pr_ref++;
+ mtx_unlock(&pr->pr_mtx);
+ taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
+ return;
+ }
+ mtx_unlock(&pr->pr_mtx);
}
diff --git a/sys/kern/kern_osd.c b/sys/kern/kern_osd.c
index 184c4f0..26831c3 100644
--- a/sys/kern/kern_osd.c
+++ b/sys/kern/kern_osd.c
@@ -44,6 +44,23 @@ __FBSDID("$FreeBSD$");
/* OSD (Object Specific Data) */
+/*
+ * Lock key:
+ * (m) osd_module_lock
+ * (o) osd_object_lock
+ * (l) osd_list_lock
+ */
+struct osd_master {
+ struct sx osd_module_lock;
+ struct rmlock osd_object_lock;
+ struct mtx osd_list_lock;
+ LIST_HEAD(, osd) osd_list; /* (l) */
+ osd_destructor_t *osd_destructors; /* (o) */
+ osd_method_t *osd_methods; /* (m) */
+ u_int osd_ntslots; /* (m) */
+ const u_int osd_nmethods;
+};
+
static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
static int osd_debug = 0;
@@ -62,25 +79,12 @@ static void do_osd_del(u_int type, struct osd *osd, u_int slot,
int list_locked);
/*
- * Lists of objects with OSD.
- *
- * Lock key:
- * (m) osd_module_lock
- * (o) osd_object_lock
- * (l) osd_list_lock
+ * List of objects with OSD.
*/
-static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */
-static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */
-static u_int osd_nslots[OSD_LAST + 1]; /* (m) */
-static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */
-static const u_int osd_nmethods[OSD_LAST + 1] = {
- [OSD_JAIL] = PR_MAXMETHOD,
+struct osd_master osdm[OSD_LAST + 1] = {
+ [OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD },
};
-static struct sx osd_module_lock[OSD_LAST + 1];
-static struct rmlock osd_object_lock[OSD_LAST + 1];
-static struct mtx osd_list_lock[OSD_LAST + 1];
-
static void
osd_default_destructor(void *value __unused)
{
@@ -102,12 +106,12 @@ osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods)
if (destructor == NULL)
destructor = osd_default_destructor;
- sx_xlock(&osd_module_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
/*
* First, we try to find unused slot.
*/
- for (i = 0; i < osd_nslots[type]; i++) {
- if (osd_destructors[type][i] == NULL) {
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ if (osdm[type].osd_destructors[i] == NULL) {
OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
type, i);
break;
@@ -116,31 +120,31 @@ osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods)
/*
* If no unused slot was found, allocate one.
*/
- if (i == osd_nslots[type]) {
- osd_nslots[type]++;
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_WAITOK);
- newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
- M_OSD, M_WAITOK);
- rm_wlock(&osd_object_lock[type]);
- bcopy(osd_destructors[type], newptr,
+ if (i == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots++;
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_WAITOK);
+ newptr = malloc(sizeof(osd_destructor_t) *
+ osdm[type].osd_ntslots, M_OSD, M_WAITOK);
+ rm_wlock(&osdm[type].osd_object_lock);
+ bcopy(osdm[type].osd_destructors, newptr,
sizeof(osd_destructor_t) * i);
- free(osd_destructors[type], M_OSD);
- osd_destructors[type] = newptr;
- rm_wunlock(&osd_object_lock[type]);
+ free(osdm[type].osd_destructors, M_OSD);
+ osdm[type].osd_destructors = newptr;
+ rm_wunlock(&osdm[type].osd_object_lock);
OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
type, i + 1);
}
- osd_destructors[type][i] = destructor;
- if (osd_nmethods[type] != 0) {
- for (m = 0; m < osd_nmethods[type]; m++)
- osd_methods[type][i * osd_nmethods[type] + m] =
- methods != NULL ? methods[m] : NULL;
+ osdm[type].osd_destructors[i] = destructor;
+ if (osdm[type].osd_nmethods != 0) {
+ for (m = 0; m < osdm[type].osd_nmethods; m++)
+ osdm[type].osd_methods[i * osdm[type].osd_nmethods + m]
+ = methods != NULL ? methods[m] : NULL;
}
- sx_xunlock(&osd_module_lock[type]);
+ sx_xunlock(&osdm[type].osd_module_lock);
return (i + 1);
}
@@ -151,105 +155,142 @@ osd_deregister(u_int type, u_int slot)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- sx_xlock(&osd_module_lock[type]);
- rm_wlock(&osd_object_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
+ rm_wlock(&osdm[type].osd_object_lock);
/*
* Free all OSD for the given slot.
*/
- mtx_lock(&osd_list_lock[type]);
- LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd)
do_osd_del(type, osd, slot, 1);
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
/*
* Set destructor to NULL to free the slot.
*/
- osd_destructors[type][slot - 1] = NULL;
- if (slot == osd_nslots[type]) {
- osd_nslots[type]--;
- osd_destructors[type] = realloc(osd_destructors[type],
- sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ osdm[type].osd_destructors[slot - 1] = NULL;
+ if (slot == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots--;
+ osdm[type].osd_destructors = realloc(osdm[type].osd_destructors,
+ sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD,
M_NOWAIT | M_ZERO);
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO);
/*
* We always reallocate to smaller size, so we assume it will
* always succeed.
*/
- KASSERT(osd_destructors[type] != NULL &&
- (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
- ("realloc() failed"));
+ KASSERT(osdm[type].osd_destructors != NULL &&
+ (osdm[type].osd_nmethods == 0 ||
+ osdm[type].osd_methods != NULL), ("realloc() failed"));
OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
type, slot);
} else {
OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
type, slot);
}
- rm_wunlock(&osd_object_lock[type]);
- sx_xunlock(&osd_module_lock[type]);
+ rm_wunlock(&osdm[type].osd_object_lock);
+ sx_xunlock(&osdm[type].osd_module_lock);
}
int
osd_set(u_int type, struct osd *osd, u_int slot, void *value)
{
+
+ return (osd_set_reserved(type, osd, slot, NULL, value));
+}
+
+void *
+osd_reserve(u_int slot)
+{
+
+ KASSERT(slot > 0, ("Invalid slot."));
+
+ OSD_DEBUG("Reserving slot array (slot=%u).", slot);
+ return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO));
+}
+
+int
+osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv,
+ void *value)
+{
struct rm_priotracker tracker;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
+ void *newptr;
+
if (value == NULL) {
OSD_DEBUG(
"Not allocating null slot (type=%u, slot=%u).",
type, slot);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
+ if (rsv)
+ osd_free_reserved(rsv);
return (0);
- } else if (osd->osd_nslots == 0) {
+ }
+
+ /*
+ * Too few slots allocated here, so we need to extend or create
+ * the array.
+ */
+ if (rsv) {
/*
- * First OSD for this object, so we need to allocate
- * space and put it onto the list.
+ * Use the reserve passed in (assumed to be
+ * the right size).
*/
- osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
- M_NOWAIT | M_ZERO);
- if (osd->osd_slots == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
- return (ENOMEM);
+ newptr = rsv;
+ if (osd->osd_nslots != 0) {
+ memcpy(newptr, osd->osd_slots,
+ sizeof(void *) * osd->osd_nslots);
+ free(osd->osd_slots, M_OSD);
}
- osd->osd_nslots = slot;
- mtx_lock(&osd_list_lock[type]);
- LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
- mtx_unlock(&osd_list_lock[type]);
- OSD_DEBUG("Setting first slot (type=%u).", type);
} else {
- void *newptr;
-
- /*
- * Too few slots allocated here, needs to extend
- * the array.
- */
newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
M_OSD, M_NOWAIT | M_ZERO);
if (newptr == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock,
+ &tracker);
return (ENOMEM);
}
- osd->osd_slots = newptr;
- osd->osd_nslots = slot;
- OSD_DEBUG("Growing slots array (type=%u).", type);
}
- }
+ if (osd->osd_nslots == 0) {
+ /*
+ * First OSD for this object, so we need to put it
+ * onto the list.
+ */
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next);
+ mtx_unlock(&osdm[type].osd_list_lock);
+ OSD_DEBUG("Setting first slot (type=%u).", type);
+ } else
+ OSD_DEBUG("Growing slots array (type=%u).", type);
+ osd->osd_slots = newptr;
+ osd->osd_nslots = slot;
+ } else if (rsv)
+ osd_free_reserved(rsv);
OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
slot, value);
osd->osd_slots[slot - 1] = value;
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (0);
}
+void
+osd_free_reserved(void *rsv)
+{
+
+ OSD_DEBUG("Discarding reserved slot array.");
+ free(rsv, M_OSD);
+}
+
void *
osd_get(u_int type, struct osd *osd, u_int slot)
{
@@ -258,9 +299,9 @@ osd_get(u_int type, struct osd *osd, u_int slot)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
value = NULL;
OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
@@ -269,7 +310,7 @@ osd_get(u_int type, struct osd *osd, u_int slot)
OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
type, slot, value);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (value);
}
@@ -278,9 +319,9 @@ osd_del(u_int type, struct osd *osd, u_int slot)
{
struct rm_priotracker tracker;
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
do_osd_del(type, osd, slot, 0);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
}
static void
@@ -290,7 +331,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
@@ -299,7 +340,7 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
return;
}
if (osd->osd_slots[slot - 1] != NULL) {
- osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+ osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]);
osd->osd_slots[slot - 1] = NULL;
}
for (i = osd->osd_nslots - 1; i >= 0; i--) {
@@ -313,10 +354,10 @@ do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
/* No values left for this object. */
OSD_DEBUG("No more slots left (type=%u).", type);
if (!list_locked)
- mtx_lock(&osd_list_lock[type]);
+ mtx_lock(&osdm[type].osd_list_lock);
LIST_REMOVE(osd, osd_next);
if (!list_locked)
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
free(osd->osd_slots, M_OSD);
osd->osd_slots = NULL;
osd->osd_nslots = 0;
@@ -342,21 +383,21 @@ osd_call(u_int type, u_int method, void *obj, void *data)
int error, i;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
- KASSERT(method < osd_nmethods[type], ("Invalid method."));
+ KASSERT(method < osdm[type].osd_nmethods, ("Invalid method."));
/*
* Call this method for every slot that defines it, stopping if an
* error is encountered.
*/
error = 0;
- sx_slock(&osd_module_lock[type]);
- for (i = 0; i < osd_nslots[type]; i++) {
- methodfun =
- osd_methods[type][i * osd_nmethods[type] + method];
+ sx_slock(&osdm[type].osd_module_lock);
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods +
+ method];
if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
break;
}
- sx_sunlock(&osd_module_lock[type]);
+ sx_sunlock(&osdm[type].osd_module_lock);
return (error);
}
@@ -374,14 +415,14 @@ osd_exit(u_int type, struct osd *osd)
return;
}
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
for (i = 1; i <= osd->osd_nslots; i++) {
- if (osd_destructors[type][i - 1] != NULL)
+ if (osdm[type].osd_destructors[i - 1] != NULL)
do_osd_del(type, osd, i, 0);
else
OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
OSD_DEBUG("Object exit (type=%u).", type);
}
@@ -391,13 +432,13 @@ osd_init(void *arg __unused)
u_int i;
for (i = OSD_FIRST; i <= OSD_LAST; i++) {
- osd_nslots[i] = 0;
- LIST_INIT(&osd_list[i]);
- sx_init(&osd_module_lock[i], "osd_module");
- rm_init(&osd_object_lock[i], "osd_object");
- mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
- osd_destructors[i] = NULL;
- osd_methods[i] = NULL;
+ sx_init(&osdm[i].osd_module_lock, "osd_module");
+ rm_init(&osdm[i].osd_object_lock, "osd_object");
+ mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF);
+ LIST_INIT(&osdm[i].osd_list);
+ osdm[i].osd_destructors = NULL;
+ osdm[i].osd_ntslots = 0;
+ osdm[i].osd_methods = NULL;
}
}
SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
index 3248278..dcbe092 100644
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -62,8 +62,10 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/module.h>
+#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/racct.h>
+#include <sys/sx.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@@ -80,6 +82,14 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
static int msginit(void);
static int msgunload(void);
static int sysvmsg_modload(struct module *, int, void *);
+static void msq_remove(struct msqid_kernel *);
+static struct prison *msg_find_prison(struct ucred *);
+static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
+static int msg_prison_check(void *, void *);
+static int msg_prison_set(void *, void *);
+static int msg_prison_get(void *, void *);
+static int msg_prison_remove(void *, void *);
+static void msg_prison_cleanup(struct prison *);
#ifdef MSG_DEBUG
@@ -155,6 +165,7 @@ static struct msgmap *msgmaps; /* MSGSEG msgmap structures */
static struct msg *msghdrs; /* MSGTQL msg headers */
static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */
static struct mtx msq_mtx; /* global mutex for message queues. */
+static unsigned msg_prison_slot;/* prison OSD slot */
static struct syscall_helper_data msg_syscalls[] = {
SYSCALL_INIT_HELPER(msgctl),
@@ -194,7 +205,15 @@ static struct syscall_helper_data msg32_syscalls[] = {
static int
msginit()
{
+ struct prison *pr;
+ void *rsv;
int i, error;
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_CHECK] = msg_prison_check,
+ [PR_METHOD_SET] = msg_prison_set,
+ [PR_METHOD_GET] = msg_prison_get,
+ [PR_METHOD_REMOVE] = msg_prison_remove,
+ };
TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
@@ -258,6 +277,29 @@ msginit()
}
mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
+ /* Set current prisons according to their allow.sysvipc. */
+ msg_prison_slot = osd_jail_register(NULL, methods);
+ rsv = osd_reserve(msg_prison_slot);
+ prison_lock(&prison0);
+ (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
+ prison_unlock(&prison0);
+ rsv = NULL;
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ if (rsv == NULL)
+ rsv = osd_reserve(msg_prison_slot);
+ prison_lock(pr);
+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+ (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+ &prison0);
+ rsv = NULL;
+ }
+ prison_unlock(pr);
+ }
+ if (rsv != NULL)
+ osd_free_reserved(rsv);
+ sx_sunlock(&allprison_lock);
+
error = syscall_helper_register(msg_syscalls);
if (error != 0)
return (error);
@@ -298,6 +340,8 @@ msgunload()
if (msqid != msginfo.msgmni)
return (EBUSY);
+ if (msg_prison_slot != 0)
+ osd_jail_deregister(msg_prison_slot);
#ifdef MAC
for (i = 0; i < msginfo.msgtql; i++)
mac_sysvmsg_destroy(&msghdrs[i]);
@@ -372,6 +416,67 @@ msg_freehdr(msghdr)
#endif
}
+static void
+msq_remove(struct msqid_kernel *msqkptr)
+{
+ struct msg *msghdr;
+
+ racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
+ racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
+ racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
+ crfree(msqkptr->cred);
+ msqkptr->cred = NULL;
+
+ /* Free the message headers */
+ msghdr = msqkptr->u.msg_first;
+ while (msghdr != NULL) {
+ struct msg *msghdr_tmp;
+
+ /* Free the segments of each message */
+ msqkptr->u.msg_cbytes -= msghdr->msg_ts;
+ msqkptr->u.msg_qnum--;
+ msghdr_tmp = msghdr;
+ msghdr = msghdr->msg_next;
+ msg_freehdr(msghdr_tmp);
+ }
+
+ if (msqkptr->u.msg_cbytes != 0)
+ panic("msg_cbytes is screwed up");
+ if (msqkptr->u.msg_qnum != 0)
+ panic("msg_qnum is screwed up");
+
+ msqkptr->u.msg_qbytes = 0; /* Mark it as free */
+
+#ifdef MAC
+ mac_sysvmsq_cleanup(msqkptr);
+#endif
+
+ wakeup(msqkptr);
+}
+
+static struct prison *
+msg_find_prison(struct ucred *cred)
+{
+ struct prison *pr, *rpr;
+
+ pr = cred->cr_prison;
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, msg_prison_slot);
+ prison_unlock(pr);
+ return rpr;
+}
+
+static int
+msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
+{
+
+ if (msqkptr->cred == NULL ||
+ !(rpr == msqkptr->cred->cr_prison ||
+ prison_ischild(rpr, msqkptr->cred->cr_prison)))
+ return (EINVAL);
+ return (0);
+}
+
#ifndef _SYS_SYSPROTO_H_
struct msgctl_args {
int msqid;
@@ -408,8 +513,10 @@ kern_msgctl(td, msqid, cmd, msqbuf)
{
int rval, error, msqix;
register struct msqid_kernel *msqkptr;
+ struct prison *rpr;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = msg_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);
@@ -433,6 +540,13 @@ kern_msgctl(td, msqid, cmd, msqbuf)
error = EINVAL;
goto done2;
}
+
+ error = msq_prison_cansee(rpr, msqkptr);
+ if (error != 0) {
+ DPRINTF(("requester can't see prison\n"));
+ goto done2;
+ }
+
#ifdef MAC
error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
if (error != 0)
@@ -446,7 +560,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
case IPC_RMID:
{
+#ifdef MAC
struct msg *msghdr;
+#endif
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
goto done2;
@@ -468,37 +584,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
}
#endif
- racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
- racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
- racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
- crfree(msqkptr->cred);
- msqkptr->cred = NULL;
-
- /* Free the message headers */
- msghdr = msqkptr->u.msg_first;
- while (msghdr != NULL) {
- struct msg *msghdr_tmp;
-
- /* Free the segments of each message */
- msqkptr->u.msg_cbytes -= msghdr->msg_ts;
- msqkptr->u.msg_qnum--;
- msghdr_tmp = msghdr;
- msghdr = msghdr->msg_next;
- msg_freehdr(msghdr_tmp);
- }
-
- if (msqkptr->u.msg_cbytes != 0)
- panic("msg_cbytes is screwed up");
- if (msqkptr->u.msg_qnum != 0)
- panic("msg_qnum is screwed up");
-
- msqkptr->u.msg_qbytes = 0; /* Mark it as free */
-
-#ifdef MAC
- mac_sysvmsq_cleanup(msqkptr);
-#endif
-
- wakeup(msqkptr);
+ msq_remove(msqkptr);
}
break;
@@ -535,6 +621,8 @@ kern_msgctl(td, msqid, cmd, msqbuf)
goto done2;
}
*msqbuf = msqkptr->u;
+ if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
+ msqbuf->msg_perm.key = IPC_PRIVATE;
break;
default:
@@ -570,7 +658,7 @@ sys_msgget(td, uap)
DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ if (msg_find_prison(cred) == NULL)
return (ENOSYS);
mtx_lock(&msq_mtx);
@@ -578,6 +666,8 @@ sys_msgget(td, uap)
for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
msqkptr = &msqids[msqid];
if (msqkptr->u.msg_qbytes != 0 &&
+ msqkptr->cred != NULL &&
+ msqkptr->cred->cr_prison == cred->cr_prison &&
msqkptr->u.msg_perm.key == key)
break;
}
@@ -690,12 +780,14 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
int msqix, segs_needed, error = 0;
register struct msqid_kernel *msqkptr;
register struct msg *msghdr;
+ struct prison *rpr;
short next;
#ifdef RACCT
size_t saved_msgsz;
#endif
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = msg_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
mtx_lock(&msq_mtx);
@@ -720,6 +812,11 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
goto done2;
}
+ if ((error = msq_prison_cansee(rpr, msqkptr))) {
+ DPRINTF(("requester can't see prison\n"));
+ goto done2;
+ }
+
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
DPRINTF(("requester doesn't have write access\n"));
goto done2;
@@ -1058,10 +1155,12 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
size_t len;
register struct msqid_kernel *msqkptr;
register struct msg *msghdr;
+ struct prison *rpr;
int msqix, error = 0;
short next;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = msg_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);
@@ -1085,6 +1184,11 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
goto done2;
}
+ if ((error = msq_prison_cansee(rpr, msqkptr))) {
+ DPRINTF(("requester can't see prison\n"));
+ goto done2;
+ }
+
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
DPRINTF(("requester doesn't have read access\n"));
goto done2;
@@ -1324,9 +1428,29 @@ sys_msgrcv(td, uap)
static int
sysctl_msqids(SYSCTL_HANDLER_ARGS)
{
+ struct msqid_kernel tmsqk;
+ struct prison *pr, *rpr;
+ int error, i;
- return (SYSCTL_OUT(req, msqids,
- sizeof(struct msqid_kernel) * msginfo.msgmni));
+ pr = req->td->td_ucred->cr_prison;
+ rpr = msg_find_prison(req->td->td_ucred);
+ error = 0;
+ for (i = 0; i < msginfo.msgmni; i++) {
+ mtx_lock(&msq_mtx);
+ if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
+ msq_prison_cansee(rpr, &msqids[i]) != 0)
+ bzero(&tmsqk, sizeof(tmsqk));
+ else {
+ tmsqk = msqids[i];
+ if (tmsqk.cred->cr_prison != pr)
+ tmsqk.u.msg_perm.key = IPC_PRIVATE;
+ }
+ mtx_unlock(&msq_mtx);
+ error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
+ if (error != 0)
+ break;
+ }
+ return (error);
}
SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
@@ -1341,9 +1465,185 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
"Size of a message segment");
SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
"Number of message segments");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
NULL, 0, sysctl_msqids, "", "Message queue IDs");
+static int
+msg_prison_check(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *prpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /*
+ * sysvmsg is a jailsys integer.
+ * It must be "disable" if the parent jail is disabled.
+ */
+ error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+ if (error != ENOENT) {
+ if (error != 0)
+ return (error);
+ switch (jsys) {
+ case JAIL_SYS_DISABLE:
+ break;
+ case JAIL_SYS_NEW:
+ case JAIL_SYS_INHERIT:
+ prison_lock(pr->pr_parent);
+ prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+ prison_unlock(pr->pr_parent);
+ if (prpr == NULL)
+ return (EPERM);
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
+
+ return (0);
+}
+
+static int
+msg_prison_set(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *tpr, *orpr, *nrpr, *trpr;
+ struct vfsoptlist *opts = data;
+ void *rsv;
+ int jsys, descend;
+
+ /*
+ * sysvmsg controls which jail is the root of the associated msgs (this
+ * jail or same as the parent), or if the feature is available at all.
+ */
+ if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+ ? JAIL_SYS_INHERIT
+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+ ? JAIL_SYS_DISABLE
+ : -1;
+ if (jsys == JAIL_SYS_DISABLE) {
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, msg_prison_slot);
+ if (orpr != NULL)
+ osd_jail_del(pr, msg_prison_slot);
+ prison_unlock(pr);
+ if (orpr != NULL) {
+ if (orpr == pr)
+ msg_prison_cleanup(pr);
+ /* Disable all child jails as well. */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr, msg_prison_slot);
+ if (trpr != NULL) {
+ osd_jail_del(tpr, msg_prison_slot);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ msg_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ } else if (jsys != -1) {
+ if (jsys == JAIL_SYS_NEW)
+ nrpr = pr;
+ else {
+ prison_lock(pr->pr_parent);
+ nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+ prison_unlock(pr->pr_parent);
+ }
+ rsv = osd_reserve(msg_prison_slot);
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, msg_prison_slot);
+ if (orpr != nrpr)
+ (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+ nrpr);
+ else
+ osd_free_reserved(rsv);
+ prison_unlock(pr);
+ if (orpr != nrpr) {
+ if (orpr == pr)
+ msg_prison_cleanup(pr);
+ if (orpr != NULL) {
+ /* Change child jails matching the old root, */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr,
+ msg_prison_slot);
+ if (trpr == orpr) {
+ (void)osd_jail_set(tpr,
+ msg_prison_slot, nrpr);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ msg_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+msg_prison_get(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /* Set sysvmsg based on the jail's root prison. */
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, msg_prison_slot);
+ prison_unlock(pr);
+ jsys = rpr == NULL ? JAIL_SYS_DISABLE
+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+ error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+msg_prison_remove(void *obj, void *data __unused)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, msg_prison_slot);
+ prison_unlock(pr);
+ if (rpr == pr)
+ msg_prison_cleanup(pr);
+ return (0);
+}
+
+static void
+msg_prison_cleanup(struct prison *pr)
+{
+ struct msqid_kernel *msqkptr;
+ int i;
+
+ /* Remove any msqs that belong to this jail. */
+ mtx_lock(&msq_mtx);
+ for (i = 0; i < msginfo.msgmni; i++) {
+ msqkptr = &msqids[i];
+ if (msqkptr->u.msg_qbytes != 0 &&
+ msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
+ msq_remove(msqkptr);
+ }
+ mtx_unlock(&msq_mtx);
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
+
#ifdef COMPAT_FREEBSD32
int
freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
@@ -1522,8 +1822,6 @@ sys_msgsys(td, uap)
{
int error;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
- return (ENOSYS);
if (uap->which < 0 ||
uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
return (EINVAL);
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index 4337d4d..c39d93d 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/racct.h>
#include <sys/sem.h>
+#include <sys/sx.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@@ -78,7 +79,16 @@ static int sysvsem_modload(struct module *, int, void *);
static int semunload(void);
static void semexit_myhook(void *arg, struct proc *p);
static int sysctl_sema(SYSCTL_HANDLER_ARGS);
-static int semvalid(int semid, struct semid_kernel *semakptr);
+static int semvalid(int semid, struct prison *rpr,
+ struct semid_kernel *semakptr);
+static void sem_remove(int semidx, struct ucred *cred);
+static struct prison *sem_find_prison(struct ucred *);
+static int sem_prison_cansee(struct prison *, struct semid_kernel *);
+static int sem_prison_check(void *, void *);
+static int sem_prison_set(void *, void *);
+static int sem_prison_get(void *, void *);
+static int sem_prison_remove(void *, void *);
+static void sem_prison_cleanup(struct prison *);
#ifndef _SYS_SYSPROTO_H_
struct __semctl_args;
@@ -104,6 +114,7 @@ LIST_HEAD(, sem_undo) semu_list; /* list of active undo structures */
LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */
static int *semu; /* undo structure pool */
static eventhandler_tag semexit_tag;
+static unsigned sem_prison_slot; /* prison OSD slot */
#define SEMUNDO_MTX sem_undo_mtx
#define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX);
@@ -208,7 +219,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0,
"Semaphore maximum value");
SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0,
"Adjust on exit max value");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, sema,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
NULL, 0, sysctl_sema, "", "Semaphore id pool");
static struct syscall_helper_data sem_syscalls[] = {
@@ -247,7 +259,15 @@ static struct syscall_helper_data sem32_syscalls[] = {
static int
seminit(void)
{
+ struct prison *pr;
+ void *rsv;
int i, error;
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_CHECK] = sem_prison_check,
+ [PR_METHOD_SET] = sem_prison_set,
+ [PR_METHOD_GET] = sem_prison_get,
+ [PR_METHOD_REMOVE] = sem_prison_remove,
+ };
TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni);
TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns);
@@ -288,6 +308,29 @@ seminit(void)
semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
EVENTHANDLER_PRI_ANY);
+ /* Set current prisons according to their allow.sysvipc. */
+ sem_prison_slot = osd_jail_register(NULL, methods);
+ rsv = osd_reserve(sem_prison_slot);
+ prison_lock(&prison0);
+ (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0);
+ prison_unlock(&prison0);
+ rsv = NULL;
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ if (rsv == NULL)
+ rsv = osd_reserve(sem_prison_slot);
+ prison_lock(pr);
+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+ (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
+ &prison0);
+ rsv = NULL;
+ }
+ prison_unlock(pr);
+ }
+ if (rsv != NULL)
+ osd_free_reserved(rsv);
+ sx_sunlock(&allprison_lock);
+
error = syscall_helper_register(sem_syscalls);
if (error != 0)
return (error);
@@ -313,6 +356,8 @@ semunload(void)
#endif
syscall_helper_unregister(sem_syscalls);
EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
+ if (sem_prison_slot != 0)
+ osd_jail_deregister(sem_prison_slot);
#ifdef MAC
for (i = 0; i < seminfo.semmni; i++)
mac_sysvsem_destroy(&sema[i]);
@@ -499,11 +544,74 @@ semundo_clear(int semid, int semnum)
}
static int
-semvalid(int semid, struct semid_kernel *semakptr)
+semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr)
{
return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
- semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
+ semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ||
+ sem_prison_cansee(rpr, semakptr) ? EINVAL : 0);
+}
+
+static void
+sem_remove(int semidx, struct ucred *cred)
+{
+ struct semid_kernel *semakptr;
+ int i;
+
+ KASSERT(semidx >= 0 && semidx < seminfo.semmni,
+ ("semidx out of bounds"));
+ semakptr = &sema[semidx];
+ semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
+ semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
+ semakptr->u.sem_perm.mode = 0;
+ racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
+ crfree(semakptr->cred);
+ semakptr->cred = NULL;
+ SEMUNDO_LOCK();
+ semundo_clear(semidx, -1);
+ SEMUNDO_UNLOCK();
+#ifdef MAC
+ mac_sysvsem_cleanup(semakptr);
+#endif
+ wakeup(semakptr);
+ for (i = 0; i < seminfo.semmni; i++) {
+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+ sema[i].u.sem_base > semakptr->u.sem_base)
+ mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
+ }
+ for (i = semakptr->u.sem_base - sem; i < semtot; i++)
+ sem[i] = sem[i + semakptr->u.sem_nsems];
+ for (i = 0; i < seminfo.semmni; i++) {
+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+ sema[i].u.sem_base > semakptr->u.sem_base) {
+ sema[i].u.sem_base -= semakptr->u.sem_nsems;
+ mtx_unlock(&sema_mtx[i]);
+ }
+ }
+ semtot -= semakptr->u.sem_nsems;
+}
+
+static struct prison *
+sem_find_prison(struct ucred *cred)
+{
+ struct prison *pr, *rpr;
+
+ pr = cred->cr_prison;
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, sem_prison_slot);
+ prison_unlock(pr);
+ return rpr;
+}
+
+static int
+sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr)
+{
+
+ if (semakptr->cred == NULL ||
+ !(rpr == semakptr->cred->cr_prison ||
+ prison_ischild(rpr, semakptr->cred->cr_prison)))
+ return (EINVAL);
+ return (0);
}
/*
@@ -582,6 +690,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
u_short *array;
struct ucred *cred = td->td_ucred;
int i, error;
+ struct prison *rpr;
struct semid_ds *sbuf;
struct semid_kernel *semakptr;
struct mtx *sema_mtxp;
@@ -590,7 +699,9 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
semid, semnum, cmd, arg));
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+ rpr = sem_find_prison(td->td_ucred);
+ if (sem == NULL)
return (ENOSYS);
array = NULL;
@@ -610,6 +721,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
error = EINVAL;
goto done2;
}
+ if ((error = sem_prison_cansee(rpr, semakptr)))
+ goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
#ifdef MAC
@@ -618,6 +731,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
goto done2;
#endif
bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+ if (cred->cr_prison != semakptr->cred->cr_prison)
+ arg->buf->sem_perm.key = IPC_PRIVATE;
*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
mtx_unlock(sema_mtxp);
return (0);
@@ -632,6 +747,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
if (cmd == IPC_RMID)
mtx_lock(&sem_mtx);
mtx_lock(sema_mtxp);
+
#ifdef MAC
error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
if (error != 0)
@@ -643,42 +759,15 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
switch (cmd) {
case IPC_RMID:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
goto done2;
- semakptr->u.sem_perm.cuid = cred->cr_uid;
- semakptr->u.sem_perm.uid = cred->cr_uid;
- semakptr->u.sem_perm.mode = 0;
- racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
- crfree(semakptr->cred);
- semakptr->cred = NULL;
- SEMUNDO_LOCK();
- semundo_clear(semidx, -1);
- SEMUNDO_UNLOCK();
-#ifdef MAC
- mac_sysvsem_cleanup(semakptr);
-#endif
- wakeup(semakptr);
- for (i = 0; i < seminfo.semmni; i++) {
- if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
- sema[i].u.sem_base > semakptr->u.sem_base)
- mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
- }
- for (i = semakptr->u.sem_base - sem; i < semtot; i++)
- sem[i] = sem[i + semakptr->u.sem_nsems];
- for (i = 0; i < seminfo.semmni; i++) {
- if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
- sema[i].u.sem_base > semakptr->u.sem_base) {
- sema[i].u.sem_base -= semakptr->u.sem_nsems;
- mtx_unlock(&sema_mtx[i]);
- }
- }
- semtot -= semakptr->u.sem_nsems;
+ sem_remove(semidx, cred);
break;
case IPC_SET:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
goto done2;
@@ -691,15 +780,17 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case IPC_STAT:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+ if (cred->cr_prison != semakptr->cred->cr_prison)
+ arg->buf->sem_perm.key = IPC_PRIVATE;
break;
case GETNCNT:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@@ -711,7 +802,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETPID:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@@ -723,7 +814,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETVAL:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@@ -759,7 +850,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
mtx_unlock(sema_mtxp);
array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
mtx_lock(sema_mtxp);
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
@@ -772,7 +863,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETZCNT:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@@ -784,7 +875,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case SETVAL:
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
goto done2;
@@ -815,7 +906,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
mtx_lock(sema_mtxp);
if (error)
break;
- if ((error = semvalid(semid, semakptr)) != 0)
+ if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
@@ -865,13 +956,16 @@ sys_semget(struct thread *td, struct semget_args *uap)
struct ucred *cred = td->td_ucred;
DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+ if (sem_find_prison(cred) == NULL)
return (ENOSYS);
mtx_lock(&sem_mtx);
if (key != IPC_PRIVATE) {
for (semid = 0; semid < seminfo.semmni; semid++) {
if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
+ sema[semid].cred != NULL &&
+ sema[semid].cred->cr_prison == cred->cr_prison &&
sema[semid].u.sem_perm.key == key)
break;
}
@@ -988,6 +1082,7 @@ sys_semop(struct thread *td, struct semop_args *uap)
struct sembuf small_sops[SMALL_SOPS];
int semid = uap->semid;
size_t nsops = uap->nsops;
+ struct prison *rpr;
struct sembuf *sops;
struct semid_kernel *semakptr;
struct sembuf *sopptr = 0;
@@ -1004,7 +1099,8 @@ sys_semop(struct thread *td, struct semop_args *uap)
#endif
DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = sem_find_prison(td->td_ucred);
+ if (sem == NULL)
return (ENOSYS);
semid = IPCID_TO_IX(semid); /* Convert back to zero origin */
@@ -1054,6 +1150,8 @@ sys_semop(struct thread *td, struct semop_args *uap)
error = EINVAL;
goto done2;
}
+ if ((error = sem_prison_cansee(rpr, semakptr)) != 0)
+ goto done2;
/*
* Initial pass thru sops to see what permissions are needed.
* Also perform any checks that don't need repeating on each
@@ -1377,11 +1475,207 @@ semexit_myhook(void *arg, struct proc *p)
static int
sysctl_sema(SYSCTL_HANDLER_ARGS)
{
+ struct prison *pr, *rpr;
+ struct semid_kernel tsemak;
+ int error, i;
+
+ pr = req->td->td_ucred->cr_prison;
+ rpr = sem_find_prison(req->td->td_ucred);
+ error = 0;
+ for (i = 0; i < seminfo.semmni; i++) {
+ mtx_lock(&sema_mtx[i]);
+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 ||
+ rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0)
+ bzero(&tsemak, sizeof(tsemak));
+ else {
+ tsemak = sema[i];
+ if (tsemak.cred->cr_prison != pr)
+ tsemak.u.sem_perm.key = IPC_PRIVATE;
+ }
+ mtx_unlock(&sema_mtx[i]);
+ error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak));
+ if (error != 0)
+ break;
+ }
+ return (error);
+}
+
+static int
+sem_prison_check(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *prpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /*
+ * sysvsem is a jailsys integer.
+ * It must be "disable" if the parent jail is disabled.
+ */
+ error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys));
+ if (error != ENOENT) {
+ if (error != 0)
+ return (error);
+ switch (jsys) {
+ case JAIL_SYS_DISABLE:
+ break;
+ case JAIL_SYS_NEW:
+ case JAIL_SYS_INHERIT:
+ prison_lock(pr->pr_parent);
+ prpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
+ prison_unlock(pr->pr_parent);
+ if (prpr == NULL)
+ return (EPERM);
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
- return (SYSCTL_OUT(req, sema,
- sizeof(struct semid_kernel) * seminfo.semmni));
+ return (0);
}
+static int
+sem_prison_set(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *tpr, *orpr, *nrpr, *trpr;
+ struct vfsoptlist *opts = data;
+ void *rsv;
+ int jsys, descend;
+
+ /*
+ * sysvsem controls which jail is the root of the associated sems (this
+ * jail or same as the parent), or if the feature is available at all.
+ */
+ if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT)
+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+ ? JAIL_SYS_INHERIT
+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+ ? JAIL_SYS_DISABLE
+ : -1;
+ if (jsys == JAIL_SYS_DISABLE) {
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, sem_prison_slot);
+ if (orpr != NULL)
+ osd_jail_del(pr, sem_prison_slot);
+ prison_unlock(pr);
+ if (orpr != NULL) {
+ if (orpr == pr)
+ sem_prison_cleanup(pr);
+ /* Disable all child jails as well. */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr, sem_prison_slot);
+ if (trpr != NULL) {
+ osd_jail_del(tpr, sem_prison_slot);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ sem_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ } else if (jsys != -1) {
+ if (jsys == JAIL_SYS_NEW)
+ nrpr = pr;
+ else {
+ prison_lock(pr->pr_parent);
+ nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
+ prison_unlock(pr->pr_parent);
+ }
+ rsv = osd_reserve(sem_prison_slot);
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, sem_prison_slot);
+ if (orpr != nrpr)
+ (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
+ nrpr);
+ else
+ osd_free_reserved(rsv);
+ prison_unlock(pr);
+ if (orpr != nrpr) {
+ if (orpr == pr)
+ sem_prison_cleanup(pr);
+ if (orpr != NULL) {
+ /* Change child jails matching the old root, */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr,
+ sem_prison_slot);
+ if (trpr == orpr) {
+ (void)osd_jail_set(tpr,
+ sem_prison_slot, nrpr);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ sem_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+sem_prison_get(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /* Set sysvsem based on the jail's root prison. */
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, sem_prison_slot);
+ prison_unlock(pr);
+ jsys = rpr == NULL ? JAIL_SYS_DISABLE
+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+ error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys));
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+sem_prison_remove(void *obj, void *data __unused)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, sem_prison_slot);
+ prison_unlock(pr);
+ if (rpr == pr)
+ sem_prison_cleanup(pr);
+ return (0);
+}
+
+static void
+sem_prison_cleanup(struct prison *pr)
+{
+ int i;
+
+ /* Remove any sems that belong to this jail. */
+ mtx_lock(&sem_mtx);
+ for (i = 0; i < seminfo.semmni; i++) {
+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+ sema[i].cred != NULL && sema[i].cred->cr_prison == pr) {
+ mtx_lock(&sema_mtx[i]);
+ sem_remove(i, NULL);
+ mtx_unlock(&sema_mtx[i]);
+ }
+ }
+ mtx_unlock(&sem_mtx);
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores");
+
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@@ -1408,8 +1702,6 @@ sys_semsys(td, uap)
{
int error;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
- return (ENOSYS);
if (uap->which < 0 ||
uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
return (EINVAL);
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 613a462..cced3e3 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -112,7 +112,8 @@ static int shmget_existing(struct thread *td, struct shmget_args *uap,
static int shm_last_free, shm_nused, shmalloced;
vm_size_t shm_committed;
-static struct shmid_kernel *shmsegs;
+static struct shmid_kernel *shmsegs;
+static unsigned shm_prison_slot;
struct shmmap_state {
vm_offset_t va;
@@ -120,8 +121,8 @@ struct shmmap_state {
};
static void shm_deallocate_segment(struct shmid_kernel *);
-static int shm_find_segment_by_key(key_t);
-static struct shmid_kernel *shm_find_segment(int, bool);
+static int shm_find_segment_by_key(struct prison *, key_t);
+static struct shmid_kernel *shm_find_segment(struct prison *, int, bool);
static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
static void shmrealloc(void);
static int shminit(void);
@@ -130,6 +131,14 @@ static int shmunload(void);
static void shmexit_myhook(struct vmspace *vm);
static void shmfork_myhook(struct proc *p1, struct proc *p2);
static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
+static void shm_remove(struct shmid_kernel *, int);
+static struct prison *shm_find_prison(struct ucred *);
+static int shm_prison_cansee(struct prison *, struct shmid_kernel *);
+static int shm_prison_check(void *, void *);
+static int shm_prison_set(void *, void *);
+static int shm_prison_get(void *, void *);
+static int shm_prison_remove(void *, void *);
+static void shm_prison_cleanup(struct prison *);
/*
* Tuneable values.
@@ -189,12 +198,14 @@ static struct sx sysvshmsx;
#define SYSVSHM_ASSERT_LOCKED() sx_assert(&sysvshmsx, SA_XLOCKED)
static int
-shm_find_segment_by_key(key_t key)
+shm_find_segment_by_key(struct prison *pr, key_t key)
{
int i;
for (i = 0; i < shmalloced; i++)
if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+ shmsegs[i].cred != NULL &&
+ shmsegs[i].cred->cr_prison == pr &&
shmsegs[i].u.shm_perm.key == key)
return (i);
return (-1);
@@ -205,7 +216,7 @@ shm_find_segment_by_key(key_t key)
* is_shmid is false.
*/
static struct shmid_kernel *
-shm_find_segment(int arg, bool is_shmid)
+shm_find_segment(struct prison *rpr, int arg, bool is_shmid)
{
struct shmid_kernel *shmseg;
int segnum;
@@ -217,7 +228,8 @@ shm_find_segment(int arg, bool is_shmid)
if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
(!shm_allow_removed &&
(shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
- (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)))
+ (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) ||
+ shm_prison_cansee(rpr, shmseg) != 0)
return (NULL);
return (shmseg);
}
@@ -271,6 +283,41 @@ shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
return (0);
}
+static void
+shm_remove(struct shmid_kernel *shmseg, int segnum)
+{
+
+ shmseg->u.shm_perm.key = IPC_PRIVATE;
+ shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
+ if (shmseg->u.shm_nattch <= 0) {
+ shm_deallocate_segment(shmseg);
+ shm_last_free = segnum;
+ }
+}
+
+static struct prison *
+shm_find_prison(struct ucred *cred)
+{
+ struct prison *pr, *rpr;
+
+ pr = cred->cr_prison;
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, shm_prison_slot);
+ prison_unlock(pr);
+ return rpr;
+}
+
+static int
+shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg)
+{
+
+ if (shmseg->cred == NULL ||
+ !(rpr == shmseg->cred->cr_prison ||
+ prison_ischild(rpr, shmseg->cred->cr_prison)))
+ return (EINVAL);
+ return (0);
+}
+
static int
kern_shmdt_locked(struct thread *td, const void *shmaddr)
{
@@ -282,7 +329,7 @@ kern_shmdt_locked(struct thread *td, const void *shmaddr)
int error, i;
SYSVSHM_ASSERT_LOCKED();
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ if (shm_find_prison(td->td_ucred) == NULL)
return (ENOSYS);
shmmap_s = p->p_vmspace->vm_shm;
if (shmmap_s == NULL)
@@ -325,6 +372,7 @@ static int
kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
int shmflg)
{
+ struct prison *rpr;
struct proc *p = td->td_proc;
struct shmid_kernel *shmseg;
struct shmmap_state *shmmap_s;
@@ -334,7 +382,8 @@ kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
int error, i, rv;
SYSVSHM_ASSERT_LOCKED();
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = shm_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
shmmap_s = p->p_vmspace->vm_shm;
if (shmmap_s == NULL) {
@@ -345,7 +394,7 @@ kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
KASSERT(p->p_vmspace->vm_shm == NULL, ("raced"));
p->p_vmspace->vm_shm = shmmap_s;
}
- shmseg = shm_find_segment(shmid, true);
+ shmseg = shm_find_segment(rpr, shmid, true);
if (shmseg == NULL)
return (EINVAL);
error = ipcperm(td, &shmseg->u.shm_perm,
@@ -433,6 +482,7 @@ static int
kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
size_t *bufsz)
{
+ struct prison *rpr;
struct shmid_kernel *shmseg;
struct shmid_ds *shmidp;
struct shm_info shm_info;
@@ -440,7 +490,8 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
SYSVSHM_ASSERT_LOCKED();
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = shm_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
error = 0;
@@ -474,7 +525,7 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
return (0);
}
}
- shmseg = shm_find_segment(shmid, cmd != SHM_STAT);
+ shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT);
if (shmseg == NULL)
return (EINVAL);
#ifdef MAC
@@ -485,10 +536,13 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
switch (cmd) {
case SHM_STAT:
case IPC_STAT:
+ shmidp = (struct shmid_ds *)buf;
error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
if (error != 0)
return (error);
- memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
+ memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds));
+ if (td->td_ucred->cr_prison != shmseg->cred->cr_prison)
+ shmidp->shm_perm.key = IPC_PRIVATE;
if (bufsz != NULL)
*bufsz = sizeof(struct shmid_ds);
if (cmd == SHM_STAT) {
@@ -512,12 +566,7 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
if (error != 0)
return (error);
- shmseg->u.shm_perm.key = IPC_PRIVATE;
- shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
- if (shmseg->u.shm_nattch <= 0) {
- shm_deallocate_segment(shmseg);
- shm_last_free = IPCID_TO_IX(shmid);
- }
+ shm_remove(shmseg, IPCID_TO_IX(shmid));
break;
#if 0
case SHM_LOCK:
@@ -724,14 +773,15 @@ sys_shmget(struct thread *td, struct shmget_args *uap)
int segnum, mode;
int error;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ if (shm_find_prison(td->td_ucred) == NULL)
return (ENOSYS);
mode = uap->shmflg & ACCESSPERMS;
SYSVSHM_LOCK();
if (uap->key == IPC_PRIVATE) {
error = shmget_allocate_segment(td, uap, mode);
} else {
- segnum = shm_find_segment_by_key(uap->key);
+ segnum = shm_find_segment_by_key(td->td_ucred->cr_prison,
+ uap->key);
if (segnum >= 0)
error = shmget_existing(td, uap, mode, segnum);
else if ((uap->shmflg & IPC_CREAT) == 0)
@@ -852,7 +902,15 @@ static struct syscall_helper_data shm32_syscalls[] = {
static int
shminit(void)
{
+ struct prison *pr;
+ void *rsv;
int i, error;
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_CHECK] = shm_prison_check,
+ [PR_METHOD_SET] = shm_prison_set,
+ [PR_METHOD_GET] = shm_prison_get,
+ [PR_METHOD_REMOVE] = shm_prison_remove,
+ };
#ifndef BURN_BRIDGES
if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
@@ -888,6 +946,29 @@ shminit(void)
shmexit_hook = &shmexit_myhook;
shmfork_hook = &shmfork_myhook;
+ /* Set current prisons according to their allow.sysvipc. */
+ shm_prison_slot = osd_jail_register(NULL, methods);
+ rsv = osd_reserve(shm_prison_slot);
+ prison_lock(&prison0);
+ (void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0);
+ prison_unlock(&prison0);
+ rsv = NULL;
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ if (rsv == NULL)
+ rsv = osd_reserve(shm_prison_slot);
+ prison_lock(pr);
+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+ (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
+ &prison0);
+ rsv = NULL;
+ }
+ prison_unlock(pr);
+ }
+ if (rsv != NULL)
+ osd_free_reserved(rsv);
+ sx_sunlock(&allprison_lock);
+
error = syscall_helper_register(shm_syscalls);
if (error != 0)
return (error);
@@ -911,6 +992,8 @@ shmunload(void)
syscall32_helper_unregister(shm32_syscalls);
#endif
syscall_helper_unregister(shm_syscalls);
+ if (shm_prison_slot != 0)
+ osd_jail_deregister(shm_prison_slot);
for (i = 0; i < shmalloced; i++) {
#ifdef MAC
@@ -934,14 +1017,209 @@ shmunload(void)
static int
sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
{
- int error;
+ struct shmid_kernel tshmseg;
+ struct prison *pr, *rpr;
+ int error, i;
SYSVSHM_LOCK();
- error = SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0]));
+ pr = req->td->td_ucred->cr_prison;
+ rpr = shm_find_prison(req->td->td_ucred);
+ error = 0;
+ for (i = 0; i < shmalloced; i++) {
+ if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
+ rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) {
+ bzero(&tshmseg, sizeof(tshmseg));
+ tshmseg.u.shm_perm.mode = SHMSEG_FREE;
+ } else {
+ tshmseg = shmsegs[i];
+ if (tshmseg.cred->cr_prison != pr)
+ tshmseg.u.shm_perm.key = IPC_PRIVATE;
+ }
+ error = SYSCTL_OUT(req, &tshmseg, sizeof(tshmseg));
+ if (error != 0)
+ break;
+ }
SYSVSHM_UNLOCK();
return (error);
}
+static int
+shm_prison_check(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *prpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /*
+ * sysvshm is a jailsys integer.
+ * It must be "disable" if the parent jail is disabled.
+ */
+ error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys));
+ if (error != ENOENT) {
+ if (error != 0)
+ return (error);
+ switch (jsys) {
+ case JAIL_SYS_DISABLE:
+ break;
+ case JAIL_SYS_NEW:
+ case JAIL_SYS_INHERIT:
+ prison_lock(pr->pr_parent);
+ prpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
+ prison_unlock(pr->pr_parent);
+ if (prpr == NULL)
+ return (EPERM);
+ break;
+ default:
+ return (EINVAL);
+ }
+ }
+
+ return (0);
+}
+
+static int
+shm_prison_set(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *tpr, *orpr, *nrpr, *trpr;
+ struct vfsoptlist *opts = data;
+ void *rsv;
+ int jsys, descend;
+
+ /*
+ * sysvshm controls which jail is the root of the associated segments
+ * (this jail or same as the parent), or if the feature is available
+ * at all.
+ */
+ if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT)
+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+ ? JAIL_SYS_INHERIT
+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+ ? JAIL_SYS_DISABLE
+ : -1;
+ if (jsys == JAIL_SYS_DISABLE) {
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, shm_prison_slot);
+ if (orpr != NULL)
+ osd_jail_del(pr, shm_prison_slot);
+ prison_unlock(pr);
+ if (orpr != NULL) {
+ if (orpr == pr)
+ shm_prison_cleanup(pr);
+ /* Disable all child jails as well. */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr, shm_prison_slot);
+ if (trpr != NULL) {
+ osd_jail_del(tpr, shm_prison_slot);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ shm_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ } else if (jsys != -1) {
+ if (jsys == JAIL_SYS_NEW)
+ nrpr = pr;
+ else {
+ prison_lock(pr->pr_parent);
+ nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
+ prison_unlock(pr->pr_parent);
+ }
+ rsv = osd_reserve(shm_prison_slot);
+ prison_lock(pr);
+ orpr = osd_jail_get(pr, shm_prison_slot);
+ if (orpr != nrpr)
+ (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
+ nrpr);
+ else
+ osd_free_reserved(rsv);
+ prison_unlock(pr);
+ if (orpr != nrpr) {
+ if (orpr == pr)
+ shm_prison_cleanup(pr);
+ if (orpr != NULL) {
+ /* Change child jails matching the old root, */
+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+ prison_lock(tpr);
+ trpr = osd_jail_get(tpr,
+ shm_prison_slot);
+ if (trpr == orpr) {
+ (void)osd_jail_set(tpr,
+ shm_prison_slot, nrpr);
+ prison_unlock(tpr);
+ if (trpr == tpr)
+ shm_prison_cleanup(tpr);
+ } else {
+ prison_unlock(tpr);
+ descend = 0;
+ }
+ }
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+shm_prison_get(void *obj, void *data)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+ struct vfsoptlist *opts = data;
+ int error, jsys;
+
+ /* Set sysvshm based on the jail's root prison. */
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, shm_prison_slot);
+ prison_unlock(pr);
+ jsys = rpr == NULL ? JAIL_SYS_DISABLE
+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+ error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys));
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+shm_prison_remove(void *obj, void *data __unused)
+{
+ struct prison *pr = obj;
+ struct prison *rpr;
+
+ SYSVSHM_LOCK();
+ prison_lock(pr);
+ rpr = osd_jail_get(pr, shm_prison_slot);
+ prison_unlock(pr);
+ if (rpr == pr)
+ shm_prison_cleanup(pr);
+ SYSVSHM_UNLOCK();
+ return (0);
+}
+
+static void
+shm_prison_cleanup(struct prison *pr)
+{
+ struct shmid_kernel *shmseg;
+ int i;
+
+ /* Remove any segments that belong to this jail. */
+ for (i = 0; i < shmalloced; i++) {
+ shmseg = &shmsegs[i];
+ if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+ shmseg->cred != NULL && shmseg->cred->cr_prison == pr) {
+ shm_remove(shmseg, i);
+ }
+ }
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory");
+
#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
struct oshmid_ds {
struct ipc_perm_old shm_perm; /* operation perms */
@@ -966,17 +1244,19 @@ oshmctl(struct thread *td, struct oshmctl_args *uap)
{
#ifdef COMPAT_43
int error = 0;
+ struct prison *rpr;
struct shmid_kernel *shmseg;
struct oshmid_ds outbuf;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+ rpr = shm_find_prison(td->td_ucred);
+ if (rpr == NULL)
return (ENOSYS);
if (uap->cmd != IPC_STAT) {
return (freebsd7_shmctl(td,
(struct freebsd7_shmctl_args *)uap));
}
SYSVSHM_LOCK();
- shmseg = shm_find_segment(uap->shmid, true);
+ shmseg = shm_find_segment(rpr, uap->shmid, true);
if (shmseg == NULL) {
SYSVSHM_UNLOCK();
return (EINVAL);
@@ -1031,8 +1311,6 @@ sys_shmsys(struct thread *td, struct shmsys_args *uap)
{
int error;
- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
- return (ENOSYS);
if (uap->which < 0 || uap->which >= nitems(shmcalls))
return (EINVAL);
error = (*shmcalls[uap->which])(td, &uap->a2);
OpenPOWER on IntegriCloud