diff options
author | zec <zec@FreeBSD.org> | 2009-05-31 12:10:04 +0000 |
---|---|---|
committer | zec <zec@FreeBSD.org> | 2009-05-31 12:10:04 +0000 |
commit | 861b77b0175f90ae49506e05c337ab56500751eb (patch) | |
tree | 6885e54172e0694f4186029864fdc53c37aa9631 /sys/kern | |
parent | ab797d42e47bd545852aab8db4db5d162e9a0e38 (diff) | |
download | FreeBSD-src-861b77b0175f90ae49506e05c337ab56500751eb.zip FreeBSD-src-861b77b0175f90ae49506e05c337ab56500751eb.tar.gz |
Introduce an interm userland-kernel API for creating vnets and
assigning ifnets from one vnet to another. Deletion of vnets is not
yet supported.
The interface is implemented as an ioctl extension so that no syscalls
had to be introduced. This should be acceptable given that the new
interface will be used for a short / interim period only, until the
new jail management framwork gains the capability of managing vnets.
This method for managing vimages / vnets has been in use for the past
7 years without any observable issues.
The userland tool to be used in conjunction with the interim API can be
found in p4: //depot/projects/vimage-commit2/src/usr.sbin/vimage/... and
will most probably never get commited to svn.
While here, bump copyright notices in kern_vimage.c and vimage.h to
cover work done in year 2009.
Approved by: julian (mentor)
Discussed with: bz, rwatson
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_prot.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_vimage.c | 404 |
2 files changed, 382 insertions, 28 deletions
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 1c5f68b..a1a55f2 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -1748,7 +1748,11 @@ p_canwait(struct thread *td, struct proc *p) KASSERT(td == curthread, ("%s: td not curthread", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); - if ((error = prison_check(td->td_ucred, p->p_ucred))) + if ( +#ifdef VIMAGE /* XXX temporary until struct vimage goes away */ + !vi_child_of(TD_TO_VIMAGE(td), P_TO_VIMAGE(p)) && +#endif + (error = prison_check(td->td_ucred, p->p_ucred))) return (error); #ifdef MAC if ((error = mac_proc_check_wait(td->td_ucred, p))) diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c index 7cb6d84..c78217a 100644 --- a/sys/kern/kern_vimage.c +++ b/sys/kern/kern_vimage.c @@ -1,6 +1,6 @@ /*- - * Copyright (c) 2004-2008 University of Zagreb - * Copyright (c) 2006-2008 FreeBSD Foundation + * Copyright (c) 2004-2009 University of Zagreb + * Copyright (c) 2006-2009 FreeBSD Foundation * * This software was developed by the University of Zagreb and the * FreeBSD Foundation under sponsorship by the Stichting NLnet and the @@ -34,16 +34,24 @@ __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include <sys/param.h> -#include <sys/types.h> #include <sys/kernel.h> #include <sys/linker.h> +#include <sys/lock.h> #include <sys/malloc.h> -#include <sys/systm.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sx.h> +#include <sys/priv.h> +#include <sys/refcount.h> #include <sys/vimage.h> #ifdef DDB #include <ddb/ddb.h> #endif +#include <net/if.h> +#include <net/route.h> +#include <net/vnet.h> + #ifndef VIMAGE_GLOBALS MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container"); @@ -57,6 +65,22 @@ static int vnet_mod_constructor(struct vnet_modlink *); static int vnet_mod_destructor(struct vnet_modlink *); #ifdef VIMAGE +static struct vimage *vimage_by_name(struct vimage *, char *); +static struct vimage *vi_alloc(struct vimage *, char *); +static struct vimage *vimage_get_next(struct vimage *, struct vimage *, int); +static void vimage_relative_name(struct vimage *, struct vimage *, + char *, int); +#endif + +#define VNET_LIST_WLOCK() \ + mtx_lock(&vnet_list_refc_mtx); \ + while (vnet_list_refc != 0) \ + cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx); + +#define VNET_LIST_WUNLOCK() \ + mtx_unlock(&vnet_list_refc_mtx); + +#ifdef VIMAGE struct vimage_list_head vimage_head; struct vnet_list_head vnet_head; struct vprocg_list_head vprocg_head; @@ -67,9 +91,294 @@ struct vprocg vprocg_0; #endif #ifdef VIMAGE +struct cv vnet_list_condvar; +struct mtx vnet_list_refc_mtx; +int vnet_list_refc = 0; + +static u_int last_vi_id = 0; +static u_int last_vnet_id = 0; +static u_int last_vprocg_id = 0; + struct vnet *vnet0; #endif +#ifdef VIMAGE + +/* + * Interim userspace interface - will be replaced by jail soon. + */ + +/* + * Move an ifnet to another vnet. The ifnet can be specified either + * by ifp argument, or by name contained in vi_req->vi_if_xname if NULL is + * passed as ifp. The target vnet can be specified either by vnet + * argument or by name. If vnet name equals to ".." or vi_req is set to + * NULL the interface is moved to the parent vnet. + */ +int +vi_if_move(struct vi_req *vi_req, struct ifnet *ifp, struct vimage *vip) +{ + struct vimage *new_vip; + struct vnet *new_vnet = NULL; + + /* Check for API / ABI version mismatch. */ + if (vi_req->vi_api_cookie != VI_API_COOKIE) + return (EDOOFUS); + + /* Find the target vnet. */ + if (vi_req == NULL || strcmp(vi_req->vi_name, "..") == 0) { + if (IS_DEFAULT_VIMAGE(vip)) + return (ENXIO); + new_vnet = vip->vi_parent->v_net; + } else { + new_vip = vimage_by_name(vip, vi_req->vi_name); + if (new_vip == NULL) + return (ENXIO); + new_vnet = new_vip->v_net; + } + + /* Try to find the target ifnet by name. */ + if (ifp == NULL) + ifp = ifunit(vi_req->vi_if_xname); + + if (ifp == NULL) + return (ENXIO); + + /* + * Check for naming clashes in target vnet. Not locked so races + * are possible. + */ + if (vi_req != NULL) { + struct ifnet *t_ifp; + + CURVNET_SET_QUIET(new_vnet); + t_ifp = ifunit(vi_req->vi_if_xname); + CURVNET_RESTORE(); + if (t_ifp != NULL) + return (EEXIST); + } + + /* Detach from curvnet and attach to new_vnet. */ + if_vmove(ifp, new_vnet); + + /* Report the new if_xname back to the userland */ + if (vi_req != NULL) + sprintf(vi_req->vi_if_xname, "%s", ifp->if_xname); + + return (0); +} + +int +vi_td_ioctl(u_long cmd, struct vi_req *vi_req, struct thread *td) +{ + int error = 0; + struct vimage *vip = TD_TO_VIMAGE(td); + struct vimage *vip_r = NULL; + + /* Check for API / ABI version mismatch. */ + if (vi_req->vi_api_cookie != VI_API_COOKIE) + return (EDOOFUS); + + error = priv_check(td, PRIV_REBOOT); /* XXX temp. priv abuse */ + if (error) + return (error); + + vip_r = vimage_by_name(vip, vi_req->vi_name); + if (vip_r == NULL && !(vi_req->vi_req_action & VI_CREATE)) + return (ESRCH); + if (vip_r != NULL && vi_req->vi_req_action & VI_CREATE) + return (EADDRINUSE); + if (vi_req->vi_req_action == VI_GETNEXT) { + vip_r = vimage_get_next(vip, vip_r, 0); + if (vip_r == NULL) + return (ESRCH); + } + if (vi_req->vi_req_action == VI_GETNEXT_RECURSE) { + vip_r = vimage_get_next(vip, vip_r, 1); + if (vip_r == NULL) + return (ESRCH); + } + + if (vip_r && !vi_child_of(vip, vip_r) && /* XXX delete the rest? */ + vi_req->vi_req_action != VI_GET && + vi_req->vi_req_action != VI_GETNEXT) + return (EPERM); + + switch (cmd) { + + case SIOCGPVIMAGE: + vimage_relative_name(vip, vip_r, vi_req->vi_name, + sizeof (vi_req->vi_name)); + vi_req->vi_proc_count = vip_r->v_procg->nprocs; + vi_req->vi_if_count = vip_r->v_net->ifcnt; + vi_req->vi_sock_count = vip_r->v_net->sockcnt; + break; + + case SIOCSPVIMAGE: + if (vi_req->vi_req_action == VI_DESTROY) { +#ifdef NOTYET + error = vi_destroy(vip_r); +#else + error = EOPNOTSUPP; +#endif + break; + } + + if (vi_req->vi_req_action == VI_SWITCHTO) { + struct proc *p = td->td_proc; + struct ucred *oldcred, *newcred; + + /* + * XXX priv_check()? + * XXX allow only a single td per proc here? + */ + newcred = crget(); + PROC_LOCK(p); + oldcred = p->p_ucred; + setsugid(p); + crcopy(newcred, oldcred); + refcount_release(&newcred->cr_vimage->vi_ucredrefc); + newcred->cr_vimage = vip_r; + refcount_acquire(&newcred->cr_vimage->vi_ucredrefc); + p->p_ucred = newcred; + PROC_UNLOCK(p); + sx_xlock(&allproc_lock); + oldcred->cr_vimage->v_procg->nprocs--; + refcount_release(&oldcred->cr_vimage->vi_ucredrefc); + P_TO_VPROCG(p)->nprocs++; + sx_xunlock(&allproc_lock); + crfree(oldcred); + break; + } + + if (vi_req->vi_req_action & VI_CREATE) { + char *dotpos; + + dotpos = strrchr(vi_req->vi_name, '.'); + if (dotpos != NULL) { + *dotpos = 0; + vip = vimage_by_name(vip, vi_req->vi_name); + if (vip == NULL) + return (ESRCH); + dotpos++; + vip_r = vi_alloc(vip, dotpos); + } else + vip_r = vi_alloc(vip, vi_req->vi_name); + if (vip_r == NULL) + return (ENOMEM); + } + } + return (error); +} + +int +vi_child_of(struct vimage *parent, struct vimage *child) +{ + + if (child == parent) + return (0); + for (; child; child = child->vi_parent) + if (child == parent) + return (1); + return (0); +} + +static struct vimage * +vimage_by_name(struct vimage *top, char *name) +{ + struct vimage *vip; + char *next_name; + int namelen; + + next_name = strchr(name, '.'); + if (next_name != NULL) { + namelen = next_name - name; + next_name++; + if (namelen == 0) { + if (strlen(next_name) == 0) + return (top); /* '.' == this vimage */ + else + return (NULL); + } + } else + namelen = strlen(name); + if (namelen == 0) + return (NULL); + LIST_FOREACH(vip, &top->vi_child_head, vi_sibling) { + if (strlen(vip->vi_name) == namelen && + strncmp(name, vip->vi_name, namelen) == 0) { + if (next_name != NULL) + return (vimage_by_name(vip, next_name)); + else + return (vip); + } + } + return (NULL); +} + +static void +vimage_relative_name(struct vimage *top, struct vimage *where, + char *buffer, int bufflen) +{ + int used = 1; + + if (where == top) { + sprintf(buffer, "."); + return; + } else + *buffer = 0; + + do { + int namelen = strlen(where->vi_name); + + if (namelen + used + 1 >= bufflen) + panic("buffer overflow"); + + if (used > 1) { + bcopy(buffer, &buffer[namelen + 1], used); + buffer[namelen] = '.'; + used++; + } else + bcopy(buffer, &buffer[namelen], used); + bcopy(where->vi_name, buffer, namelen); + used += namelen; + where = where->vi_parent; + } while (where != top); +} + +static struct vimage * +vimage_get_next(struct vimage *top, struct vimage *where, int recurse) +{ + struct vimage *next; + + if (recurse) { + /* Try to go deeper in the hierarchy */ + next = LIST_FIRST(&where->vi_child_head); + if (next != NULL) + return (next); + } + + do { + /* Try to find next sibling */ + next = LIST_NEXT(where, vi_sibling); + if (!recurse || next != NULL) + return (next); + + /* Nothing left on this level, go one level up */ + where = where->vi_parent; + } while (where != top->vi_parent); + + /* Nothing left to be visited, we are done */ + return (NULL); +} + +#endif /* VIMAGE */ /* User interface block */ + + +/* + * Kernel interfaces and handlers. + */ + void vnet_mod_register(const struct vnet_modinfo *vmi) { @@ -221,7 +530,7 @@ vnet_mod_constructor(struct vnet_modlink *vml) void *mem = malloc(vmi->vmi_size, M_VNET, M_NOWAIT | M_ZERO); if (mem == NULL) /* XXX should return error, not panic. */ - panic("vi_alloc: malloc for %s\n", vmi->vmi_name); + panic("malloc for %s\n", vmi->vmi_name); curvnet->mod_data[vmi->vmi_id] = mem; } #endif @@ -301,43 +610,84 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr) return (ENOENT); } -static void -vi_init(void *unused) -{ #ifdef VIMAGE +static struct vimage * +vi_alloc(struct vimage *parent, char *name) +{ struct vimage *vip; struct vprocg *vprocg; struct vnet *vnet; -#endif - - TAILQ_INIT(&vnet_modlink_head); - TAILQ_INIT(&vnet_modpending_head); - -#ifdef VIMAGE - LIST_INIT(&vimage_head); - LIST_INIT(&vprocg_head); - LIST_INIT(&vnet_head); + struct vnet_modlink *vml; vip = malloc(sizeof(struct vimage), M_VIMAGE, M_NOWAIT | M_ZERO); if (vip == NULL) - panic("malloc failed for struct vimage"); + panic("vi_alloc: malloc failed for vimage \"%s\"\n", name); + vip->vi_id = last_vi_id++; + LIST_INIT(&vip->vi_child_head); + sprintf(vip->vi_name, "%s", name); + vip->vi_parent = parent; + /* XXX locking */ + if (parent != NULL) + LIST_INSERT_HEAD(&parent->vi_child_head, vip, vi_sibling); + else if (!LIST_EMPTY(&vimage_head)) + panic("there can be only one default vimage!"); LIST_INSERT_HEAD(&vimage_head, vip, vi_le); + vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); + if (vnet == NULL) + panic("vi_alloc: malloc failed for vnet \"%s\"\n", name); + vip->v_net = vnet; + vnet->vnet_id = last_vnet_id++; + if (vnet->vnet_id == 0) + vnet0 = vnet; + vnet->vnet_magic_n = VNET_MAGIC_N; + vprocg = malloc(sizeof(struct vprocg), M_VPROCG, M_NOWAIT | M_ZERO); if (vprocg == NULL) - panic("malloc failed for struct vprocg"); + panic("vi_alloc: malloc failed for vprocg \"%s\"\n", name); vip->v_procg = vprocg; - LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le); + vprocg->vprocg_id = last_vprocg_id++; - vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); - if (vnet == NULL) - panic("vi_alloc: malloc failed"); + /* Initialize / attach vnet module instances. */ + CURVNET_SET_QUIET(vnet); + TAILQ_FOREACH(vml, &vnet_modlink_head, vml_mod_le) + vnet_mod_constructor(vml); + CURVNET_RESTORE(); + + VNET_LIST_WLOCK(); LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); - vnet->vnet_magic_n = VNET_MAGIC_N; - vip->v_net = vnet; - vnet0 = vnet; + VNET_LIST_WUNLOCK(); + + /* XXX locking */ + LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le); + + return (vip); +} +#endif /* VIMAGE */ - /* We MUST clear curvnet in vi_init_done before going SMP. */ +static void +vi_init(void *unused) +{ + + TAILQ_INIT(&vnet_modlink_head); + TAILQ_INIT(&vnet_modpending_head); + +#ifdef VIMAGE + LIST_INIT(&vimage_head); + LIST_INIT(&vprocg_head); + LIST_INIT(&vnet_head); + + mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF); + cv_init(&vnet_list_condvar, "vnet_list_condvar"); + + /* Default image has no parent and no name. */ + vi_alloc(NULL, ""); + + /* + * We MUST clear curvnet in vi_init_done() before going SMP, + * otherwise CURVNET_SET() macros would scream about unnecessary + * curvnet recursions. + */ curvnet = LIST_FIRST(&vnet_head); #endif } |