diff options
-rw-r--r-- | sys/kern/kern_jail.c | 29 | ||||
-rw-r--r-- | sys/kern/kern_vimage.c | 214 | ||||
-rw-r--r-- | sys/net/if.c | 18 | ||||
-rw-r--r-- | sys/net/if.h | 2 | ||||
-rw-r--r-- | sys/sys/jail.h | 2 | ||||
-rw-r--r-- | sys/sys/priv.h | 1 | ||||
-rw-r--r-- | sys/sys/sockio.h | 3 | ||||
-rw-r--r-- | sys/sys/vimage.h | 23 |
8 files changed, 192 insertions, 100 deletions
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index c13b7d4..68d8dc2 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -126,6 +126,9 @@ static char *pr_flag_names[] = { #ifdef INET6 [3] = "ip6", #endif +#ifdef VIMAGE + [4] = "vnet", +#endif }; static char *pr_flag_nonames[] = { @@ -137,6 +140,9 @@ static char *pr_flag_nonames[] = { #ifdef INET6 [3] = "noip6", #endif +#ifdef VIMAGE + [4] = "novnet", +#endif }; static char *pr_allow_names[] = { @@ -561,6 +567,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) vfs_opterror(opts, "new jail must persist or attach"); goto done_errmsg; } +#ifdef VIMAGE + if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { + error = EINVAL; + vfs_opterror(opts, "vnet cannot be changed after creation"); + goto done_errmsg; + } +#endif pr_allow = ch_allow = 0; for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); @@ -1113,6 +1126,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) LIST_INIT(&pr->pr_children); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); +#ifdef VIMAGE + /* Allocate a new vnet if specified. */ + pr->pr_vnet = (pr_flags & PR_VNET) + ? vnet_alloc() : ppr->pr_vnet; +#endif /* * Allocate a dedicated cpuset for each jail. * Unlike other initial settings, this may return an erorr. @@ -2410,6 +2428,10 @@ prison_deref(struct prison *pr, int flags) tpr->pr_prisoncount--; sx_downgrade(&allprison_lock); +#ifdef VIMAGE + if (pr->pr_flags & PR_VNET) + vnet_destroy(pr->pr_vnet); +#endif if (pr->pr_root != NULL) { vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vrele(pr->pr_root); @@ -3849,6 +3871,10 @@ SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail cannot see all mounted file systems"); SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail persistence"); +#ifdef VIMAGE +SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, + "B", "Virtual network stack"); +#endif SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, "B", "Jail is in the process of shutting down"); @@ -3923,6 +3949,9 @@ db_show_prison(struct prison *pr) db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); +#ifdef VIMAGE + db_printf(" vnet = %p\n", pr->pr_vnet); +#endif db_printf(" root = %p\n", pr->pr_root); db_printf(" securelevel = %d\n", pr->pr_securelevel); db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c index bb2c509..90af1ad 100644 --- a/sys/kern/kern_vimage.c +++ b/sys/kern/kern_vimage.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include <sys/param.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/linker.h> #include <sys/lock.h> @@ -96,7 +97,6 @@ struct mtx vnet_list_refc_mtx; int vnet_list_refc = 0; static u_int last_vi_id = 0; -static u_int last_vnet_id = 0; static u_int last_vprocg_id = 0; struct vnet *vnet0; @@ -105,69 +105,90 @@ struct vnet *vnet0; #ifdef VIMAGE /* - * Interim userspace interface - will be replaced by jail soon. - */ - -/* - * Move an ifnet to another vnet. The ifnet can be specified either - * by ifp argument, or by name contained in vi_req->vi_if_xname if NULL is - * passed as ifp. The target vnet can be specified either by vnet - * argument or by name. If vnet name equals to ".." or vi_req is set to - * NULL the interface is moved to the parent vnet. + * Move an ifnet to or from another vnet, specified by the jail id. If a + * vi_req is passed in, it is used to find the interface and a vimage + * containing the vnet (a vimage name of ".." stands for the parent vnet). */ int -vi_if_move(struct vi_req *vi_req, struct ifnet *ifp, struct vimage *vip) +vi_if_move(struct thread *td, struct ifnet *ifp, char *ifname, int jid, + struct vi_req *vi_req) { - struct vimage *new_vip; - struct vnet *new_vnet = NULL; + struct ifnet *t_ifp; + struct prison *pr; + struct vimage *new_vip, *my_vip; + struct vnet *new_vnet; - /* Check for API / ABI version mismatch. */ - if (vi_req != NULL && vi_req->vi_api_cookie != VI_API_COOKIE) - return (EDOOFUS); + if (vi_req != NULL) { + /* SIOCSIFVIMAGE */ + /* Check for API / ABI version mismatch. */ + if (vi_req->vi_api_cookie != VI_API_COOKIE) + return (EDOOFUS); + + /* Find the target vnet. */ + my_vip = TD_TO_VIMAGE(td); + if (strcmp(vi_req->vi_name, "..") == 0) { + if (IS_DEFAULT_VIMAGE(my_vip)) + return (ENXIO); + new_vnet = my_vip->vi_parent->v_net; + } else { + new_vip = vimage_by_name(my_vip, vi_req->vi_name); + if (new_vip == NULL) + return (ENXIO); + new_vnet = new_vip->v_net; + } - /* Find the target vnet. */ - if (vi_req == NULL || strcmp(vi_req->vi_name, "..") == 0) { - if (IS_DEFAULT_VIMAGE(vip)) + /* Try to find the target ifnet by name. */ + ifname = vi_req->vi_if_xname; + ifp = ifunit(ifname); + if (ifp == NULL) return (ENXIO); - new_vnet = vip->vi_parent->v_net; } else { - new_vip = vimage_by_name(vip, vi_req->vi_name); - if (new_vip == NULL) + sx_slock(&allprison_lock); + pr = prison_find_child(td->td_ucred->cr_prison, jid); + sx_sunlock(&allprison_lock); + if (pr == NULL) return (ENXIO); - new_vnet = new_vip->v_net; - } - - /* Try to find the target ifnet by name. */ - if (ifp == NULL) - ifp = ifunit(vi_req->vi_if_xname); + mtx_unlock(&pr->pr_mtx); + if (ifp != NULL) { + /* SIOCSIFVNET */ + new_vnet = pr->pr_vnet; + } else { + /* SIOCSIFRVNET */ + new_vnet = TD_TO_VNET(td); + CURVNET_SET(pr->pr_vnet); + ifp = ifunit(ifname); + CURVNET_RESTORE(); + if (ifp == NULL) + return (ENXIO); + } - if (ifp == NULL) - return (ENXIO); + /* No-op if the target jail has the same vnet. */ + if (new_vnet == ifp->if_vnet) + return (0); + } /* * Check for naming clashes in target vnet. Not locked so races * are possible. */ - if (vi_req != NULL) { - struct ifnet *t_ifp; - - CURVNET_SET_QUIET(new_vnet); - t_ifp = ifunit(vi_req->vi_if_xname); - CURVNET_RESTORE(); - if (t_ifp != NULL) - return (EEXIST); - } + CURVNET_SET_QUIET(new_vnet); + t_ifp = ifunit(ifname); + CURVNET_RESTORE(); + if (t_ifp != NULL) + return (EEXIST); /* Detach from curvnet and attach to new_vnet. */ if_vmove(ifp, new_vnet); /* Report the new if_xname back to the userland */ - if (vi_req != NULL) - sprintf(vi_req->vi_if_xname, "%s", ifp->if_xname); - + sprintf(ifname, "%s", ifp->if_xname); return (0); } +/* + * Interim userspace interface - will be replaced by jail soon. + */ + int vi_td_ioctl(u_long cmd, struct vi_req *vi_req, struct thread *td) { @@ -606,13 +627,66 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr) } #ifdef VIMAGE +struct vnet * +vnet_alloc(void) +{ + struct vnet *vnet; + struct vnet_modlink *vml; + + vnet = malloc(sizeof(struct vnet), M_VNET, M_WAITOK | M_ZERO); + vnet->vnet_magic_n = VNET_MAGIC_N; + + /* Initialize / attach vnet module instances. */ + CURVNET_SET_QUIET(vnet); + TAILQ_FOREACH(vml, &vnet_modlink_head, vml_mod_le) + vnet_mod_constructor(vml); + CURVNET_RESTORE(); + + VNET_LIST_WLOCK(); + LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); + VNET_LIST_WUNLOCK(); + + return (vnet); +} + +void +vnet_destroy(struct vnet *vnet) +{ + struct ifnet *ifp, *nifp; + struct vnet_modlink *vml; + + KASSERT(vnet->sockcnt == 0, ("%s: vnet still has sockets", __func__)); + + VNET_LIST_WLOCK(); + LIST_REMOVE(vnet, vnet_le); + VNET_LIST_WUNLOCK(); + + CURVNET_SET_QUIET(vnet); + INIT_VNET_NET(vnet); + + /* Return all inherited interfaces to their parent vnets. */ + TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { + if (ifp->if_home_vnet != ifp->if_vnet) + if_vmove(ifp, ifp->if_home_vnet); + } + + /* Detach / free per-module state instances. */ + TAILQ_FOREACH_REVERSE(vml, &vnet_modlink_head, + vnet_modlink_head, vml_mod_le) + vnet_mod_destructor(vml); + + CURVNET_RESTORE(); + + /* Hopefully, we are OK to free the vnet container itself. */ + vnet->vnet_magic_n = 0xdeadbeef; + free(vnet, M_VNET); +} + static struct vimage * vi_alloc(struct vimage *parent, char *name) { struct vimage *vip; struct vprocg *vprocg; - struct vnet *vnet; - struct vnet_modlink *vml; vip = malloc(sizeof(struct vimage), M_VIMAGE, M_NOWAIT | M_ZERO); if (vip == NULL) @@ -628,14 +702,7 @@ vi_alloc(struct vimage *parent, char *name) panic("there can be only one default vimage!"); LIST_INSERT_HEAD(&vimage_head, vip, vi_le); - vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); - if (vnet == NULL) - panic("vi_alloc: malloc failed for vnet \"%s\"\n", name); - vip->v_net = vnet; - vnet->vnet_id = last_vnet_id++; - if (vnet->vnet_id == 0) - vnet0 = vnet; - vnet->vnet_magic_n = VNET_MAGIC_N; + vip->v_net = vnet_alloc(); vprocg = malloc(sizeof(struct vprocg), M_VPROCG, M_NOWAIT | M_ZERO); if (vprocg == NULL) @@ -643,16 +710,6 @@ vi_alloc(struct vimage *parent, char *name) vip->v_procg = vprocg; vprocg->vprocg_id = last_vprocg_id++; - /* Initialize / attach vnet module instances. */ - CURVNET_SET_QUIET(vnet); - TAILQ_FOREACH(vml, &vnet_modlink_head, vml_mod_le) - vnet_mod_constructor(vml); - CURVNET_RESTORE(); - - VNET_LIST_WLOCK(); - LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); - VNET_LIST_WUNLOCK(); - /* XXX locking */ LIST_INSERT_HEAD(&vprocg_head, vprocg, vprocg_le); @@ -666,10 +723,7 @@ vi_alloc(struct vimage *parent, char *name) static int vi_destroy(struct vimage *vip) { - struct vnet *vnet = vip->v_net; struct vprocg *vprocg = vip->v_procg; - struct ifnet *ifp, *nifp; - struct vnet_modlink *vml; /* XXX Beware of races -> more locking to be done... */ if (!LIST_EMPTY(&vip->vi_child_head)) @@ -678,9 +732,6 @@ vi_destroy(struct vimage *vip) if (vprocg->nprocs != 0) return (EBUSY); - if (vnet->sockcnt != 0) - return (EBUSY); - #ifdef INVARIANTS if (vip->vi_ucredrefc != 0) printf("vi_destroy: %s ucredrefc %d\n", @@ -688,33 +739,12 @@ vi_destroy(struct vimage *vip) #endif /* Point with no return - cleanup MUST succeed! */ + vnet_destroy(vip->v_net); + LIST_REMOVE(vip, vi_le); LIST_REMOVE(vip, vi_sibling); LIST_REMOVE(vprocg, vprocg_le); - VNET_LIST_WLOCK(); - LIST_REMOVE(vnet, vnet_le); - VNET_LIST_WUNLOCK(); - - CURVNET_SET_QUIET(vnet); - INIT_VNET_NET(vnet); - - /* Return all inherited interfaces to their parent vnets. */ - TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { - if (ifp->if_home_vnet != ifp->if_vnet) - vi_if_move(NULL, ifp, vip); - } - - /* Detach / free per-module state instances. */ - TAILQ_FOREACH_REVERSE(vml, &vnet_modlink_head, - vnet_modlink_head, vml_mod_le) - vnet_mod_destructor(vml); - - CURVNET_RESTORE(); - - /* Hopefully, we are OK to free the vnet container itself. */ - vnet->vnet_magic_n = 0xdeadbeef; - free(vnet, M_VNET); free(vprocg, M_VPROCG); free(vip, M_VIMAGE); @@ -745,7 +775,7 @@ vi_init(void *unused) * otherwise CURVNET_SET() macros would scream about unnecessary * curvnet recursions. */ - curvnet = LIST_FIRST(&vnet_head); + curvnet = prison0.pr_vnet = vnet0 = LIST_FIRST(&vnet_head); #endif } diff --git a/sys/net/if.c b/sys/net/if.c index baf6939..2679be3 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2127,6 +2127,15 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) rt_ifannouncemsg(ifp, IFAN_ARRIVAL); break; +#ifdef VIMAGE + case SIOCSIFVNET: + error = priv_check(td, PRIV_NET_SETIFVNET); + if (error) + return (error); + error = vi_if_move(td, ifp, ifr->ifr_name, ifr->ifr_jid, NULL); + break; +#endif + case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) @@ -2313,14 +2322,19 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) switch (cmd) { #ifdef VIMAGE + case SIOCSIFRVNET: + error = priv_check(td, PRIV_NET_SETIFVNET); + if (error) + return (error); + return (vi_if_move(td, NULL, ifr->ifr_name, ifr->ifr_jid, + NULL)); /* * XXX vnet creation will be implemented through the new jail * framework - this is just a temporary hack for testing the * vnet create / destroy mechanisms. */ case SIOCSIFVIMAGE: - error = vi_if_move((struct vi_req *) data, NULL, - TD_TO_VIMAGE(td)); + error = vi_if_move(td, NULL, NULL, 0, (struct vi_req *) data); return (error); case SIOCSPVIMAGE: case SIOCGPVIMAGE: diff --git a/sys/net/if.h b/sys/net/if.h index b73b46f..857ab7f 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -296,6 +296,7 @@ struct ifreq { struct sockaddr ifru_broadaddr; short ifru_flags[2]; short ifru_index; + int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; @@ -308,6 +309,7 @@ struct ifreq { #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ +#define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ diff --git a/sys/sys/jail.h b/sys/sys/jail.h index 494a677..667ca9c 100644 --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -171,6 +171,7 @@ struct prison { char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */ char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */ unsigned long pr_hostid; /* (p) jail hostid */ + struct vnet *pr_vnet; /* (c) network stack */ }; #endif /* _KERNEL || _WANT_PRISON */ @@ -180,6 +181,7 @@ struct prison { #define PR_HOST 0x00000002 /* Virtualize hostname et al */ #define PR_IP4_USER 0x00000004 /* Virtualize IPv4 addresses */ #define PR_IP6_USER 0x00000008 /* Virtualize IPv6 addresses */ +#define PR_VNET 0x00000010 /* Virtual network stack */ /* Internal flag bits */ #define PR_REMOVE 0x01000000 /* In process of being removed */ diff --git a/sys/sys/priv.h b/sys/sys/priv.h index 7936de0..d82341f 100644 --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -325,6 +325,7 @@ #define PRIV_NET_DELIFADDR 414 /* Delete protocol addr on interface. */ #define PRIV_NET_LAGG 415 /* Administer lagg interface. */ #define PRIV_NET_GIF 416 /* Administer gif interface. */ +#define PRIV_NET_SETIFVNET 417 /* Move interface to vnet. */ /* * 802.11-related privileges. diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index 5d92ec2..efaab91 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -108,6 +108,9 @@ #define SIOCGPRIVATE_0 _IOWR('i', 80, struct ifreq) /* device private 0 */ #define SIOCGPRIVATE_1 _IOWR('i', 81, struct ifreq) /* device private 1 */ +#define SIOCSIFVNET _IOWR('i', 90, struct ifreq) /* move IF jail/vnet */ +#define SIOCSIFRVNET _IOWR('i', 91, struct ifreq) /* reclaim vnet IF */ + #define SIOCSPVIMAGE _IOW('i', 101, struct vi_req) /* set proc vimage */ #define SIOCGPVIMAGE _IOWR('i', 102, struct vi_req) /* get proc vimage */ #define SIOCSIFVIMAGE _IOWR('i', 103, struct vi_req) /* set ifc vi/net */ diff --git a/sys/sys/vimage.h b/sys/sys/vimage.h index 28a244e..a2027ae 100644 --- a/sys/sys/vimage.h +++ b/sys/sys/vimage.h @@ -35,6 +35,9 @@ #include <sys/proc.h> #include <sys/queue.h> +#ifdef VIMAGE +#include <sys/jail.h> +#endif /* Interim userspace API. */ struct vi_req { @@ -69,11 +72,13 @@ struct vi_req { #define VNET_DEBUG #endif +struct vimage; struct vprocg; struct vnet; struct vi_req; struct ifnet; struct kld_sym_lookup; +struct thread; typedef int vnet_attach_fn(const void *); typedef int vnet_detach_fn(const void *); @@ -157,13 +162,16 @@ struct vnet_modlink { int vi_symlookup(struct kld_sym_lookup *, char *); int vi_td_ioctl(u_long, struct vi_req *, struct thread *); -int vi_if_move(struct vi_req *, struct ifnet *, struct vimage *); +int vi_if_move(struct thread *, struct ifnet *, char *, int, + struct vi_req *); int vi_child_of(struct vimage *, struct vimage *); struct vimage *vimage_by_name(struct vimage *, char *); void vnet_mod_register(const struct vnet_modinfo *); void vnet_mod_register_multi(const struct vnet_modinfo *, void *, char *); void vnet_mod_deregister(const struct vnet_modinfo *); void vnet_mod_deregister_multi(const struct vnet_modinfo *, void *, char *); +struct vnet *vnet_alloc(void); +void vnet_destroy(struct vnet *); #endif /* !VIMAGE_GLOBALS */ @@ -199,7 +207,7 @@ struct vimage { LIST_HEAD(, vimage) vi_child_head; /* direct offspring list */ struct vimage *vi_parent; /* ptr to parent vimage */ u_int vi_id; /* ID num */ - u_int vi_ucredrefc; /* # of ucreds pointing to us */ + volatile u_int vi_ucredrefc; /* # of ucreds pointing to us */ char vi_name[MAXHOSTNAMELEN]; struct vnet *v_net; struct vprocg *v_procg; @@ -209,7 +217,6 @@ struct vnet { void *mod_data[VNET_MOD_MAX]; LIST_ENTRY(vnet) vnet_le; /* all vnets list */ u_int vnet_magic_n; - u_int vnet_id; /* ID num */ u_int ifcnt; u_int sockcnt; }; @@ -319,20 +326,24 @@ extern struct vprocg_list_head vprocg_head; #ifdef VIMAGE #define IS_DEFAULT_VIMAGE(arg) ((arg)->vi_id == 0) -#define IS_DEFAULT_VNET(arg) ((arg)->vnet_id == 0) +#define IS_DEFAULT_VNET(arg) ((arg) == vnet0) #else #define IS_DEFAULT_VIMAGE(arg) 1 #define IS_DEFAULT_VNET(arg) 1 #endif #ifdef VIMAGE +#define CRED_TO_VNET(cr) \ + (IS_DEFAULT_VIMAGE((cr)->cr_vimage) ? (cr)->cr_prison->pr_vnet \ + : (cr)->cr_vimage->v_net) #define TD_TO_VIMAGE(td) (td)->td_ucred->cr_vimage -#define TD_TO_VNET(td) (td)->td_ucred->cr_vimage->v_net +#define TD_TO_VNET(td) CRED_TO_VNET((td)->td_ucred) #define TD_TO_VPROCG(td) (td)->td_ucred->cr_vimage->v_procg #define P_TO_VIMAGE(p) (p)->p_ucred->cr_vimage -#define P_TO_VNET(p) (p)->p_ucred->cr_vimage->v_net +#define P_TO_VNET(p) CRED_TO_VNET((p)->p_ucred) #define P_TO_VPROCG(p) (p)->p_ucred->cr_vimage->v_procg #else /* !VIMAGE */ +#define CRED_TO_VNET(cr) NULL #define TD_TO_VIMAGE(td) NULL #define TD_TO_VNET(td) NULL #define P_TO_VIMAGE(p) NULL |