diff options
35 files changed, 319 insertions, 63 deletions
@@ -22,6 +22,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW: to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20090430: + The layout of the following structs has changed: sysctl_oid, + socket, ifnet, inpcbinfo, tcpcb, syncache_head, vnet_inet, + vnet_inet6 and vnet_ipfw. Most modules need to be rebuild or + panics may be experienced. World rebuild is required for + correctly checking networking state from userland. + Bump __FreeBSD_version to 800085. + 20090429: MLDv2 and Source-Specific Multicast (SSM) have been merged to the IPv6 stack. VIMAGE hooks are in but not yet used. diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c index 80c1789..05f9031 100644 --- a/sys/kern/kern_mib.c +++ b/sys/kern/kern_mib.c @@ -208,9 +208,8 @@ static char machine_arch[] = MACHINE_ARCH; SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD, machine_arch, 0, "System architecture"); -#ifndef VIMAGE +/* should become #ifndef VIMAGE */ char hostname[MAXHOSTNAMELEN]; -#endif /* * This mutex is used to protect the hostname and domainname variables, and @@ -349,9 +348,8 @@ SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW, 0, 0, sysctl_kern_config, "", "Kernel configuration file"); #endif -#ifndef VIMAGE +/* should become #ifndef VIMAGE */ char domainname[MAXHOSTNAMELEN]; /* Protected by hostname_mtx. */ -#endif static int sysctl_domainname(SYSCTL_HANDLER_ARGS) diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index d39db26..b69ac8f 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -934,6 +934,30 @@ sysctl_handle_int(SYSCTL_HANDLER_ARGS) return (error); } +#ifdef VIMAGE +int +sysctl_handle_v_int(SYSCTL_HANDLER_ARGS) +{ + int tmpout, error = 0; + + SYSCTL_RESOLVE_V_ARG1(); + + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + tmpout = *(int *)arg1; + error = SYSCTL_OUT(req, &tmpout, sizeof(int)); + + if (error || !req->newptr) + return (error); + + if (!arg1) + error = EPERM; + else + error = SYSCTL_IN(req, arg1, sizeof(int)); + return (error); +} +#endif /* * Based on on sysctl_handle_int() convert milliseconds into ticks. @@ -944,7 +968,9 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) { int error, s, tt; - tt = *(int *)oidp->oid_arg1; + SYSCTL_RESOLVE_V_ARG1(); + + tt = *(int *)arg1; s = (int)((int64_t)tt * 1000 / hz); error = sysctl_handle_int(oidp, &s, 0, req); @@ -955,7 +981,7 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) if (tt < 1) return (EINVAL); - *(int *)oidp->oid_arg1 = tt; + *(int *)arg1 = tt; return (0); } @@ -1069,6 +1095,47 @@ retry: return (error); } +#ifdef VIMAGE +int +sysctl_handle_v_string(SYSCTL_HANDLER_ARGS) +{ + int error=0; + char *tmparg; + size_t outlen; + + SYSCTL_RESOLVE_V_ARG1(); + + /* + * Attempt to get a coherent snapshot by copying to a + * temporary kernel buffer. + */ +retry: + outlen = strlen((char *)arg1)+1; + tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK); + + if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) { + free(tmparg, M_SYSCTLTMP); + goto retry; + } + + error = SYSCTL_OUT(req, tmparg, outlen); + free(tmparg, M_SYSCTLTMP); + + if (error || !req->newptr) + return (error); + + if ((req->newlen - req->newidx) >= arg2) { + error = EINVAL; + } else { + arg2 = (req->newlen - req->newidx); + error = SYSCTL_IN(req, arg1, arg2); + ((char *)arg1)[arg2] = '\0'; + } + + return (error); +} +#endif + /* * Handle any kind of opaque data. * arg1 points to it, arg2 is the size. @@ -1106,6 +1173,35 @@ retry: return (error); } +#ifdef VIMAGE +int +sysctl_handle_v_opaque(SYSCTL_HANDLER_ARGS) +{ + int error, tries; + u_int generation; + struct sysctl_req req2; + + SYSCTL_RESOLVE_V_ARG1(); + + tries = 0; + req2 = *req; +retry: + generation = curthread->td_generation; + error = SYSCTL_OUT(req, arg1, arg2); + if (error) + return (error); + tries++; + if (generation != curthread->td_generation && tries < 3) { + *req = req2; + goto retry; + } + + error = SYSCTL_IN(req, arg1, arg2); + + return (error); +} +#endif + /* * Transfer functions to/from kernel space. * XXX: rather untested at this point diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c index 156efec..310e328 100644 --- a/sys/kern/kern_vimage.c +++ b/sys/kern/kern_vimage.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #ifndef VIMAGE_GLOBALS MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container"); +MALLOC_DEFINE(M_VNET, "vnet", "network stack control block"); static TAILQ_HEAD(vnet_modlink_head, vnet_modlink) vnet_modlink_head; static TAILQ_HEAD(vnet_modpending_head, vnet_modlink) vnet_modpending_head; @@ -49,6 +50,12 @@ static void vnet_mod_complete_registration(struct vnet_modlink *); static int vnet_mod_constructor(struct vnet_modlink *); static int vnet_mod_destructor(struct vnet_modlink *); +#ifdef VIMAGE +/* curvnet should be thread-local - this is only a temporary step. */ +struct vnet *curvnet; +struct vnet_list_head vnet_head; +#endif + void vnet_mod_register(const struct vnet_modinfo *vmi) { @@ -263,7 +270,14 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr) for (mapentry = vml->vml_modinfo->vmi_symmap; mapentry->name != NULL; mapentry++) { if (strcmp(symstr, mapentry->name) == 0) { - lookup->symvalue = (u_long) mapentry->base; +#ifdef VIMAGE + lookup->symvalue = + (u_long) curvnet->mod_data[ + vml->vml_modinfo->vmi_id]; + lookup->symvalue += mapentry->offset; +#else + lookup->symvalue = (u_long) mapentry->offset; +#endif lookup->symsize = mapentry->size; return (0); } @@ -275,9 +289,23 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr) static void vi_init(void *unused) { +#ifdef VIMAGE + struct vnet *vnet; +#endif TAILQ_INIT(&vnet_modlink_head); TAILQ_INIT(&vnet_modpending_head); + +#ifdef VIMAGE + LIST_INIT(&vnet_head); + + vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO); + if (vnet == NULL) + panic("vi_alloc: malloc failed"); + LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le); + + curvnet = LIST_FIRST(&vnet_head); +#endif } static void diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 9d9a731..a2f7c05 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -130,6 +130,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/uio.h> #include <sys/jail.h> +#include <sys/vimage.h> #include <security/mac/mac_framework.h> @@ -284,6 +285,9 @@ soalloc(void) mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; ++numopensockets; +#ifdef VIMAGE + so->so_vnet = curvnet; +#endif mtx_unlock(&so_global_mtx); return (so); } diff --git a/sys/net/if.c b/sys/net/if.c index 5c2e224..92bf8a6 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -186,6 +186,7 @@ static struct vnet_symmap vnet_net_symmap[] = { static const vnet_modinfo_t vnet_net_modinfo = { .vmi_id = VNET_MOD_NET, .vmi_name = "net", + .vmi_size = sizeof(struct vnet_net), .vmi_symmap = vnet_net_symmap, .vmi_iattach = vnet_net_iattach }; @@ -545,6 +546,7 @@ if_alloc(u_char type) static void if_free_internal(struct ifnet *ifp) { + INIT_VNET_NET(ifp->if_vnet); KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); @@ -582,7 +584,6 @@ if_free_internal(struct ifnet *ifp) void if_free_type(struct ifnet *ifp, u_char type) { - INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */ KASSERT(ifp->if_alloctype == type, ("if_free_type: type (%d) != alloctype (%d)", type, @@ -673,6 +674,10 @@ if_attach(struct ifnet *ifp) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); +#ifdef VIMAGE + ifp->if_vnet = curvnet; +#endif + if_addgroup(ifp, IFG_ALL); getmicrotime(&ifp->if_lastchange); @@ -978,6 +983,9 @@ if_detach(struct ifnet *ifp) } IF_AFDATA_UNLOCK(ifp); ifq_detach(&ifp->if_snd); +#ifdef VIMAGE + ifp->if_vnet = NULL; +#endif splx(s); } diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index a72d89c..50279a2 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -127,6 +127,7 @@ static int vnet_gif_iattach(const void *); static const vnet_modinfo_t vnet_gif_modinfo = { .vmi_id = VNET_MOD_GIF, .vmi_name = "gif", + .vmi_size = sizeof(struct vnet_gif), .vmi_dependson = VNET_MOD_NET, .vmi_iattach = vnet_gif_iattach }; @@ -303,8 +304,10 @@ gifmodevent(mod, type, data) if_clone_detach(&gif_cloner); mtx_destroy(&gif_mtx); #ifdef INET6 +#ifndef VIMAGE V_ip6_gif_hlim = 0; /* XXX -> vnet_gif_idetach() */ #endif +#endif break; default: return EOPNOTSUPP; diff --git a/sys/net/if_mib.c b/sys/net/if_mib.c index 9482eb9..4caa40f 100644 --- a/sys/net/if_mib.c +++ b/sys/net/if_mib.c @@ -77,7 +77,6 @@ SYSCTL_V_INT(V_NET, vnet_net, _net_link_generic_system, IFMIB_IFCOUNT, static int sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */ { - INIT_VNET_NET(curvnet); int *name = (int *)arg1; int error; u_int namelen = arg2; diff --git a/sys/net/if_var.h b/sys/net/if_var.h index eb4986e..e6a6a26 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -117,6 +117,7 @@ struct ifqueue { struct ifnet { void *if_softc; /* pointer to driver state */ void *if_l2com; /* pointer to protocol bits */ + struct vnet *if_vnet; /* pointer to network stack instance */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ const char *if_dname; /* driver name */ diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 9ff531a..dbc5ca8 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -126,7 +126,9 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) INIT_VNET_INET(curvnet); int error; - error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + SYSCTL_RESOLVE_V_ARG1(); + + error = sysctl_handle_int(oidp, arg1, arg2, req); if (error == 0) { RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 82b4126..c86f1ab 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -224,6 +224,8 @@ struct inpcb { #define in6p_icmp6filt inp_depend6.inp6_icmp6filt #define in6p_cksum inp_depend6.inp6_cksum +#define inp_vnet inp_pcbinfo->ipi_vnet + /* * The range of the generation count, as used in this implementation, is 9e19. * We would have to create 300 billion connections per second for this number @@ -301,8 +303,12 @@ struct inpcbinfo { struct rwlock ipi_lock; /* - * vimage 1 - * general use 1 + * Pointer to network stack instance + */ + struct vnet *ipi_vnet; + + /* + * general use 2 */ void *ipi_pspare[2]; }; diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index f88a8fd..3bd3049 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -162,6 +162,9 @@ div_init(void) INP_INFO_LOCK_INIT(&V_divcbinfo, "div"); LIST_INIT(&V_divcb); V_divcbinfo.ipi_listhead = &V_divcb; +#ifdef VIMAGE + V_divcbinfo.ipi_vnet = curvnet; +#endif /* * XXX We don't use the hash list for divert IP, but it's easier * to allocate a one entry hash list than it is to check all diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index fa37a73..cfc3089 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -695,7 +695,6 @@ struct vnet_ipfw { int _fw_deny_unknown_exthdrs; int _fw_verbose; int _verbose_limit; - int _fw_debug; /* actually unused */ int _autoinc_step; ipfw_dyn_rule **_ipfw_dyn_v; uma_zone_t _ipfw_dyn_rule_zone; @@ -740,7 +739,6 @@ extern struct vnet_ipfw vnet_ipfw_0; #define V_fw_deny_unknown_exthdrs VNET_IPFW(fw_deny_unknown_exthdrs) #define V_fw_verbose VNET_IPFW(fw_verbose) #define V_verbose_limit VNET_IPFW(verbose_limit) -#define V_fw_debug VNET_IPFW(fw_debug) #define V_autoinc_step VNET_IPFW(autoinc_step) #define V_ipfw_dyn_v VNET_IPFW(ipfw_dyn_v) #define V_ipfw_dyn_rule_zone VNET_IPFW(ipfw_dyn_rule_zone) diff --git a/sys/netinet/ip_fw_pfil.c b/sys/netinet/ip_fw_pfil.c index c2f10b1..3064cd9 100644 --- a/sys/netinet/ip_fw_pfil.c +++ b/sys/netinet/ip_fw_pfil.c @@ -95,6 +95,7 @@ int ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) { + INIT_VNET_INET(curvnet); struct ip_fw_args args; struct ng_ipfw_tag *ng_tag; struct m_tag *dn_tag; @@ -224,6 +225,7 @@ int ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) { + INIT_VNET_INET(curvnet); struct ip_fw_args args; struct ng_ipfw_tag *ng_tag; struct m_tag *dn_tag; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index a294d0a..7ee5a13 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -237,6 +237,7 @@ static void vnet_inet_register(void); static const vnet_modinfo_t vnet_inet_modinfo = { .vmi_id = VNET_MOD_INET, .vmi_name = "inet", + .vmi_size = sizeof(struct vnet_inet) }; static void vnet_inet_register() diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 0775168..695a9cb 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -187,6 +187,9 @@ rip_init(void) INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip"); LIST_INIT(&V_ripcb); +#ifdef VIMAGE + V_ripcbinfo.ipi_vnet = curvnet; +#endif V_ripcbinfo.ipi_listhead = &V_ripcb; V_ripcbinfo.ipi_hashbase = hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9e92aab..c1dc4b3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -359,6 +359,9 @@ tcp_init(void) INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp"); LIST_INIT(&V_tcb); +#ifdef VIMAGE + V_tcbinfo.ipi_vnet = curvnet; +#endif V_tcbinfo.ipi_listhead = &V_tcb; hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); @@ -703,6 +706,9 @@ tcp_newtcpcb(struct inpcb *inp) if (tm == NULL) return (NULL); tp = &tm->tcb; +#ifdef VIMAGE + tp->t_vnet = inp->inp_vnet; +#endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = tp->t_maxopd = diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 18f3fb4..8e80842 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -259,6 +259,9 @@ syncache_init(void) /* Initialize the hash buckets. */ for (i = 0; i < V_tcp_syncache.hashsize; i++) { +#ifdef VIMAGE + V_tcp_syncache.hashbase[i].sch_vnet = curvnet; +#endif TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket); mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head", NULL, MTX_DEF); diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index c367d33..e4e3fac 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -96,6 +96,7 @@ struct syncache { #define SYNCOOKIE_LIFETIME 16 /* seconds */ struct syncache_head { + struct vnet *sch_vnet; struct mtx sch_mtx; TAILQ_HEAD(sch_head, syncache) sch_bucket; struct callout sch_timer; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 268db7c..5dc840e 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -35,6 +35,8 @@ #include <netinet/tcp.h> +struct vnet; + /* * Kernel variables for tcp. */ @@ -106,6 +108,8 @@ struct tcpcb { int t_state; /* state of this connection */ u_int t_flags; + struct vnet *t_vnet; /* back pointer to parent vnet */ + tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; * used to recognize retransmits @@ -186,8 +190,8 @@ struct tcpcb { int t_rttlow; /* smallest observerved RTT */ u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ int rfbuf_cnt; /* recv buffer autoscaling byte count */ - void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */ - struct toe_usrreqs *t_tu; /* offload operations vector */ + void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / 1 general use */ + struct toe_usrreqs *t_tu; /* offload operations vector */ void *t_toe; /* TOE pcb pointer */ int t_bytes_acked; /* # bytes acked during current RTT */ }; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index f2359eb..9aa83dd 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -179,6 +179,9 @@ udp_init(void) INP_INFO_LOCK_INIT(&V_udbinfo, "udp"); LIST_INIT(&V_udb); +#ifdef VIMAGE + V_udbinfo.ipi_vnet = curvnet; +#endif V_udbinfo.ipi_listhead = &V_udb; V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB, &V_udbinfo.ipi_hashmask); diff --git a/sys/netinet/vinet.h b/sys/netinet/vinet.h index 20a36c5..0057eff 100644 --- a/sys/netinet/vinet.h +++ b/sys/netinet/vinet.h @@ -54,7 +54,6 @@ struct vnet_inet { struct in_ifaddrhashhead *_in_ifaddrhashtbl; struct in_ifaddrhead _in_ifaddrhead; u_long _in_ifaddrhmask; - struct in_multihead _in_multihead; /* XXX unused */ int _arpt_keep; int _arp_maxtries; @@ -269,7 +268,6 @@ extern struct vnet_inet vnet_inet_0; #define V_in_ifaddrhashtbl VNET_INET(in_ifaddrhashtbl) #define V_in_ifaddrhead VNET_INET(in_ifaddrhead) #define V_in_ifaddrhmask VNET_INET(in_ifaddrhmask) -#define V_in_multihead VNET_INET(in_multihead) #define V_ip_checkinterface VNET_INET(ip_checkinterface) #define V_ip_defttl VNET_INET(ip_defttl) #define V_ip_do_randomid VNET_INET(ip_do_randomid) diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 077014e..4738f91 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -888,8 +888,9 @@ in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf, } void -in6_tmpaddrtimer(void *ignored_arg) +in6_tmpaddrtimer(void *arg) { + CURVNET_SET((struct vnet *) arg); INIT_VNET_NET(curvnet); INIT_VNET_INET6(curvnet); struct nd_ifinfo *ndi; @@ -898,7 +899,7 @@ in6_tmpaddrtimer(void *ignored_arg) callout_reset(&V_in6_tmpaddrtimer_ch, (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - - V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL); + V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet); bzero(nullbuf, sizeof(nullbuf)); for (ifp = TAILQ_FIRST(&V_ifnet); ifp; @@ -914,12 +915,12 @@ in6_tmpaddrtimer(void *ignored_arg) } } + CURVNET_RESTORE(); } static void in6_purgemaddrs(struct ifnet *ifp) { - INIT_VNET_INET6(ifp->if_vnet); LIST_HEAD(,in6_multi) purgeinms; struct in6_multi *inm, *tinm; struct ifmultiaddr *ifma; diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c index b3f272c..89fbedb 100644 --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -1302,7 +1302,6 @@ static int in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) { INIT_VNET_NET(curvnet); - INIT_VNET_INET6(curvnet); struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; @@ -1463,6 +1462,7 @@ out_in6p_locked: static struct ip6_moptions * in6p_findmoptions(struct inpcb *inp) { + INIT_VNET_INET6(curvnet); struct ip6_moptions *imo; struct in6_multi **immp; struct in6_mfilter *imfp; @@ -1745,7 +1745,6 @@ static struct ifnet * in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, const struct sockaddr_in6 *gsin6) { - INIT_VNET_INET6(curvnet); struct route_in6 ro6; struct ifnet *ifp; @@ -2032,7 +2031,6 @@ static int in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) { INIT_VNET_NET(curvnet); - INIT_VNET_INET(curvnet); struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; @@ -2249,7 +2247,6 @@ static int in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) { INIT_VNET_NET(curvnet); - INIT_VNET_INET6(curvnet); struct ifnet *ifp; struct ip6_moptions *imo; u_int ifindex; @@ -2454,6 +2451,7 @@ out_in6p_locked: int ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt) { + INIT_VNET_INET6(curvnet); struct ip6_moptions *im6o; int error; diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 622777c..c0b0b25 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -447,6 +447,8 @@ sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS) int error = 0; int old; + SYSCTL_RESOLVE_V_ARG1(); + error = SYSCTL_OUT(req, arg1, sizeof(int)); if (error || !req->newptr) return (error); @@ -467,6 +469,8 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) int error = 0; int old; + SYSCTL_RESOLVE_V_ARG1(); + error = SYSCTL_OUT(req, arg1, sizeof(int)); if (error || !req->newptr) return (error); diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 69ac45c..b0cf344 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -161,6 +161,7 @@ static void vnet_inet6_register(void); static const vnet_modinfo_t vnet_inet6_modinfo = { .vmi_id = VNET_MOD_INET6, .vmi_name = "inet6", + .vmi_size = sizeof(struct vnet_inet6), .vmi_dependson = VNET_MOD_INET /* XXX revisit - TCP/UDP needs this? */ }; @@ -307,14 +308,14 @@ ip6_init2_vnet(const void *unused __unused) /* nd6_timer_init */ callout_init(&V_nd6_timer_ch, 0); - callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL); + callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); /* timer for regeneranation of temporary addresses randomize ID */ callout_init(&V_in6_tmpaddrtimer_ch, 0); callout_reset(&V_in6_tmpaddrtimer_ch, (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - V_ip6_temp_regen_advance) * hz, - in6_tmpaddrtimer, NULL); + in6_tmpaddrtimer, curvnet); return (0); } diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index 17b1df8..4359322 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -435,7 +435,6 @@ mld_dispatch_queue(struct ifqueue *ifq, int limit) static __inline int mld_is_addr_reported(const struct in6_addr *addr) { - INIT_VNET_INET6(curvnet); KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); @@ -639,7 +638,6 @@ static int mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, const struct mld_hdr *mld) { - INIT_VNET_INET6(ifp->if_vnet); struct ifmultiaddr *ifma; struct mld_ifinfo *mli; struct in6_multi *inm; @@ -1034,7 +1032,6 @@ static int mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, const struct mld_hdr *mld) { - INIT_VNET_INET6(curvnet); struct in6_ifaddr *ia; struct in6_multi *inm; #ifdef KTR @@ -1646,7 +1643,6 @@ mld_slowtimo_vnet(void) static void mld_v1_process_querier_timers(struct mld_ifinfo *mli) { - INIT_VNET_INET6(curvnet); MLD_LOCK_ASSERT(); @@ -3009,7 +3005,6 @@ out: static struct mbuf * mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) { - INIT_VNET_INET6(curvnet); struct mbuf *mh; struct mldv2_report *mld; struct ip6_hdr *ip6; diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 387e77b..e387a7f 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -191,7 +191,7 @@ nd6_init(void) /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, - nd6_slowtimo, NULL); + nd6_slowtimo, curvnet); nd6_init_done = 1; @@ -593,7 +593,7 @@ void nd6_timer(void *arg) { CURVNET_SET_QUIET((struct vnet *) arg); - INIT_VNET_INET6((struct vnet *) arg); + INIT_VNET_INET6(curvnet); int s; struct nd_defrouter *dr; struct nd_prefix *pr; @@ -601,7 +601,7 @@ nd6_timer(void *arg) struct in6_addrlifetime *lt6; callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, - nd6_timer, NULL); + nd6_timer, curvnet); /* expire default router list */ s = splnet(); @@ -872,7 +872,6 @@ nd6_purge(struct ifnet *ifp) struct llentry * nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) { - INIT_VNET_INET6(curvnet); struct sockaddr_in6 sin6; struct llentry *ln; int llflags = 0; @@ -1669,7 +1668,7 @@ nd6_slowtimo(void *arg) struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, - nd6_slowtimo, NULL); + nd6_slowtimo, curvnet); IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index c340ffd..43be628 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -651,6 +651,7 @@ rip6_attach(struct socket *so, int proto, struct thread *td) static void rip6_detach(struct socket *so) { + INIT_VNET_INET(so->so_vnet); INIT_VNET_INET6(so->so_vnet); struct inpcb *inp; diff --git a/sys/netinet6/vinet6.h b/sys/netinet6/vinet6.h index 76bbec9..2e96d04 100644 --- a/sys/netinet6/vinet6.h +++ b/sys/netinet6/vinet6.h @@ -54,8 +54,6 @@ struct vnet_inet6 { u_int _frag6_nfrags; struct ip6q _ip6q; - struct route_in6 _ip6_forward_rt; /* XXX remove */ - struct in6_addrpolicy _defaultaddrpolicy; TAILQ_HEAD(, addrsel_policyent) _addrsel_policytab; u_int _in6_maxmtu; @@ -122,10 +120,6 @@ struct vnet_inet6 { int _udp6_recvspace; int _ip6qmaxlen; int _ip6_prefer_tempaddr; - int _ip6_forward_srcrt; /* XXX remove */ - int _ip6_sourcecheck; /* XXX remove */ - int _ip6_sourcecheck_interval; /* XXX remove */ - int _ip6_ours_check_algorithm; /* XXX remove */ int _nd6_prune; int _nd6_delay; diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index a76afd2..4124d9d 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -248,6 +248,7 @@ MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); static const vnet_modinfo_t vnet_ipsec_modinfo = { .vmi_id = VNET_MOD_IPSEC, .vmi_name = "ipsec", + .vmi_size = sizeof(struct vnet_ipsec), .vmi_dependson = VNET_MOD_INET, /* XXX revisit - INET6 ? */ .vmi_iattach = ipsec_iattach }; diff --git a/sys/sys/param.h b/sys/sys/param.h index 91f153b..fcf1a53 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800084 /* Master, propagated to newvers */ +#define __FreeBSD_version 800085 /* Master, propagated to newvers */ #ifndef LOCORE #include <sys/types.h> diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 8ce97e5..81e6b88 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -45,6 +45,8 @@ #include <sys/sockopt.h> #endif +struct vnet; + /* * Kernel structure per socket. * Contains send and receive buffer queues, @@ -72,6 +74,7 @@ struct socket { short so_state; /* (b) internal state flags SS_* */ int so_qstate; /* (e) internal state flags SQ_* */ void *so_pcb; /* protocol control block */ + struct vnet *so_vnet; /* network stack instance */ struct protosw *so_proto; /* (a) protocol handle */ /* * Variables for connection queuing. diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 7d0afb3..c6da7b9 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -163,6 +163,8 @@ struct sysctl_oid { const char *oid_fmt; int oid_refcnt; const char *oid_descr; + short oid_v_subs; + short oid_v_mod; }; #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) @@ -292,7 +294,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #ifdef VIMAGE #define SYSCTL_V_INT(subs, mod, parent, nbr, name, access, sym, val, descr) \ - SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_INT|(access), \ + SYSCTL_V_OID(subs, mod, parent, nbr, name, \ + CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \ sym, val, sysctl_handle_v_int, "I", descr) #else #ifdef VIMAGE_GLOBALS @@ -317,7 +320,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #ifdef VIMAGE #define SYSCTL_V_UINT(subs, mod, parent, nbr, name, access, sym, val, descr) \ - SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_UINT|(access), \ + SYSCTL_V_OID(subs, mod, parent, nbr, name, \ + CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \ sym, val, sysctl_handle_v_int, "IU", descr) #else #ifdef VIMAGE_GLOBALS @@ -440,6 +444,29 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #define FEATURE(name, desc) \ SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc) +/* + * Resolve void *arg1 in a proper virtualization container. + */ +#ifdef VIMAGE +#define SYSCTL_RESOLVE_V_ARG1() do { \ + char *cp; \ + switch (oidp->oid_v_subs) { \ + case V_GLOBAL: \ + /* do nothing - this is NOT a virtualized variable! */ \ + break; \ + case V_NET: \ + cp = (char *) \ + TD_TO_VNET(curthread)->mod_data[oidp->oid_v_mod]; \ + arg1 = cp + (size_t) arg1; \ + break; \ + default: \ + panic("unsupported module id %d", oidp->oid_v_subs); \ + } \ +} while (0) +#else +#define SYSCTL_RESOLVE_V_ARG1() +#endif + #endif /* _KERNEL */ /* diff --git a/sys/sys/vimage.h b/sys/sys/vimage.h index 013d8bc..b5edd78 100644 --- a/sys/sys/vimage.h +++ b/sys/sys/vimage.h @@ -39,6 +39,10 @@ #error "You cannot have both option VIMAGE and option VIMAGE_GLOBALS!" #endif +#ifdef INVARIANTS +#define VNET_DEBUG +#endif + typedef int vnet_attach_fn(const void *); typedef int vnet_detach_fn(const void *); @@ -48,8 +52,8 @@ struct kld_sym_lookup; struct vnet_symmap { char *name; - void *base; - size_t size; + size_t offset; + size_t size; }; typedef struct vnet_symmap vnet_symmap_t; @@ -59,7 +63,7 @@ struct vnet_modinfo { char *vmi_name; vnet_attach_fn *vmi_iattach; vnet_detach_fn *vmi_idetach; - size_t vmi_struct_size; + size_t vmi_size; struct vnet_symmap *vmi_symmap; }; typedef struct vnet_modinfo vnet_modinfo_t; @@ -71,13 +75,7 @@ struct vnet_modlink { const char *vml_iname; }; -#define VNET_SYMMAP(mod, name) \ - { #name, &(vnet_ ## mod ## _0._ ## name), \ - sizeof(vnet_ ## mod ## _0._ ## name) } - -#define VNET_SYMMAP_END { NULL, 0 } - -/* stateful modules */ +/* Stateful modules. */ #define VNET_MOD_NET 0 /* MUST be 0 - implicit dependency */ #define VNET_MOD_NETGRAPH 1 #define VNET_MOD_INET 2 @@ -93,7 +91,7 @@ struct vnet_modlink { #define VNET_MOD_IGMP 12 #define VNET_MOD_MLD 13 -/* stateless modules */ +/* Stateless modules. */ #define VNET_MOD_NG_ETHER 20 #define VNET_MOD_NG_IFACE 21 #define VNET_MOD_NG_EIFACE 22 @@ -109,7 +107,11 @@ struct vnet_modlink { #define VNET_MOD_DYNAMIC_START 32 #define VNET_MOD_MAX 64 -/* Sysctl virtualization macros need these name mappings bellow */ +/* Major module IDs for vimage sysctl virtualization. */ +#define V_GLOBAL 0 /* global variable - no indirection */ +#define V_NET 1 + +/* Name mappings for minor module IDs in vimage sysctl virtualization. */ #define V_MOD_vnet_net VNET_MOD_NET #define V_MOD_vnet_netgraph VNET_MOD_NETGRAPH #define V_MOD_vnet_inet VNET_MOD_INET @@ -131,27 +133,82 @@ void vnet_mod_deregister_multi(const struct vnet_modinfo *, void *, char *); #define VSYM(base, sym) (sym) #else #ifdef VIMAGE -#error "No option VIMAGE yet!" +#define VSYM(base, sym) ((base)->_ ## sym) #else #define VSYM(base, sym) (base ## _0._ ## sym) #endif #endif +#ifndef VIMAGE_GLOBALS +#ifdef VIMAGE +/* + * Casted NULL hack is needed for harvesting sizeofs() of fields inside + * struct vnet_* containers at compile time. + */ +#define VNET_SYMMAP(mod, name) \ + { #name, offsetof(struct vnet_ ## mod, _ ## name), \ + sizeof(((struct vnet_ ## mod *) NULL)->_ ## name) } +#else +#define VNET_SYMMAP(mod, name) \ + { #name, (size_t) &(vnet_ ## mod ## _0._ ## name), \ + sizeof(vnet_ ## mod ## _0._ ## name) } +#endif +#define VNET_SYMMAP_END { NULL, 0 } +#endif /* !VIMAGE_GLOBALS */ + +#ifdef VIMAGE +struct vnet { + void *mod_data[VNET_MOD_MAX]; + LIST_ENTRY(vnet) vnet_le; /* all vnets list */ + u_int vnet_magic_n; +}; +#endif + +#ifdef VIMAGE +extern struct vnet *curvnet; /* XXX will become thread-local soon */ +#else +#define curvnet NULL +#endif + +#ifdef VIMAGE +#ifdef VNET_DEBUG +#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \ + if (vnet == NULL || vnet != curvnet) \ + panic("in %s:%d %s()\n vnet=%p curvnet=%p", \ + __FILE__, __LINE__, __FUNCTION__, \ + vnet, curvnet); \ + modtype *sym = (vnet)->mod_data[modindex]; +#else /* !VNET_DEBUG */ +#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \ + modtype *sym = (vnet)->mod_data[modindex]; +#endif /* !VNET_DEBUG */ +#else /* !VIMAGE */ +#define INIT_FROM_VNET(vnet, modindex, modtype, sym) +#endif + +#ifdef VIMAGE +LIST_HEAD(vnet_list_head, vnet); +extern struct vnet_list_head vnet_head; +#define VNET_ITERATOR_DECL(arg) struct vnet *arg; +#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le) +#else +#define VNET_ITERATOR_DECL(arg) +#define VNET_FOREACH(arg) +#endif + +#define TD_TO_VNET(td) curvnet + /* Non-VIMAGE null-macros */ #define IS_DEFAULT_VNET(arg) 1 #define CURVNET_SET(arg) #define CURVNET_SET_QUIET(arg) #define CURVNET_RESTORE() #define VNET_ASSERT(condition) -#define INIT_FROM_VNET(vnet, modindex, modtype, sym) -#define VNET_ITERATOR_DECL(arg) -#define VNET_FOREACH(arg) #define VNET_LIST_RLOCK() #define VNET_LIST_RUNLOCK() #define INIT_VPROCG(arg) #define INIT_VCPU(arg) #define TD_TO_VIMAGE(td) -#define TD_TO_VNET(td) #define TD_TO_VPROCG(td) #define TD_TO_VCPU(td) #define P_TO_VIMAGE(p) |