summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--UPDATING8
-rw-r--r--sys/kern/kern_mib.c6
-rw-r--r--sys/kern/kern_sysctl.c100
-rw-r--r--sys/kern/kern_vimage.c30
-rw-r--r--sys/kern/uipc_socket.c4
-rw-r--r--sys/net/if.c10
-rw-r--r--sys/net/if_gif.c3
-rw-r--r--sys/net/if_mib.c1
-rw-r--r--sys/net/if_var.h1
-rw-r--r--sys/netinet/in_pcb.c4
-rw-r--r--sys/netinet/in_pcb.h10
-rw-r--r--sys/netinet/ip_divert.c3
-rw-r--r--sys/netinet/ip_fw.h2
-rw-r--r--sys/netinet/ip_fw_pfil.c2
-rw-r--r--sys/netinet/ip_input.c1
-rw-r--r--sys/netinet/raw_ip.c3
-rw-r--r--sys/netinet/tcp_subr.c6
-rw-r--r--sys/netinet/tcp_syncache.c3
-rw-r--r--sys/netinet/tcp_syncache.h1
-rw-r--r--sys/netinet/tcp_var.h8
-rw-r--r--sys/netinet/udp_usrreq.c3
-rw-r--r--sys/netinet/vinet.h2
-rw-r--r--sys/netinet6/in6_ifattach.c7
-rw-r--r--sys/netinet6/in6_mcast.c6
-rw-r--r--sys/netinet6/in6_proto.c4
-rw-r--r--sys/netinet6/ip6_input.c5
-rw-r--r--sys/netinet6/mld6.c5
-rw-r--r--sys/netinet6/nd6.c9
-rw-r--r--sys/netinet6/raw_ip6.c1
-rw-r--r--sys/netinet6/vinet6.h6
-rw-r--r--sys/netipsec/ipsec.c1
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/socketvar.h3
-rw-r--r--sys/sys/sysctl.h31
-rw-r--r--sys/sys/vimage.h91
35 files changed, 319 insertions, 63 deletions
diff --git a/UPDATING b/UPDATING
index 270ef53..7fffa1b 100644
--- a/UPDATING
+++ b/UPDATING
@@ -22,6 +22,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
+20090430:
+ The layout of the following structs has changed: sysctl_oid,
+ socket, ifnet, inpcbinfo, tcpcb, syncache_head, vnet_inet,
+ vnet_inet6 and vnet_ipfw. Most modules need to be rebuild or
+ panics may be experienced. World rebuild is required for
+ correctly checking networking state from userland.
+ Bump __FreeBSD_version to 800085.
+
20090429:
MLDv2 and Source-Specific Multicast (SSM) have been merged
to the IPv6 stack. VIMAGE hooks are in but not yet used.
diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c
index 80c1789..05f9031 100644
--- a/sys/kern/kern_mib.c
+++ b/sys/kern/kern_mib.c
@@ -208,9 +208,8 @@ static char machine_arch[] = MACHINE_ARCH;
SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD,
machine_arch, 0, "System architecture");
-#ifndef VIMAGE
+/* should become #ifndef VIMAGE */
char hostname[MAXHOSTNAMELEN];
-#endif
/*
* This mutex is used to protect the hostname and domainname variables, and
@@ -349,9 +348,8 @@ SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW,
0, 0, sysctl_kern_config, "", "Kernel configuration file");
#endif
-#ifndef VIMAGE
+/* should become #ifndef VIMAGE */
char domainname[MAXHOSTNAMELEN]; /* Protected by hostname_mtx. */
-#endif
static int
sysctl_domainname(SYSCTL_HANDLER_ARGS)
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index d39db26..b69ac8f 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -934,6 +934,30 @@ sysctl_handle_int(SYSCTL_HANDLER_ARGS)
return (error);
}
+#ifdef VIMAGE
+int
+sysctl_handle_v_int(SYSCTL_HANDLER_ARGS)
+{
+ int tmpout, error = 0;
+
+ SYSCTL_RESOLVE_V_ARG1();
+
+ /*
+ * Attempt to get a coherent snapshot by making a copy of the data.
+ */
+ tmpout = *(int *)arg1;
+ error = SYSCTL_OUT(req, &tmpout, sizeof(int));
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (!arg1)
+ error = EPERM;
+ else
+ error = SYSCTL_IN(req, arg1, sizeof(int));
+ return (error);
+}
+#endif
/*
* Based on on sysctl_handle_int() convert milliseconds into ticks.
@@ -944,7 +968,9 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
{
int error, s, tt;
- tt = *(int *)oidp->oid_arg1;
+ SYSCTL_RESOLVE_V_ARG1();
+
+ tt = *(int *)arg1;
s = (int)((int64_t)tt * 1000 / hz);
error = sysctl_handle_int(oidp, &s, 0, req);
@@ -955,7 +981,7 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
if (tt < 1)
return (EINVAL);
- *(int *)oidp->oid_arg1 = tt;
+ *(int *)arg1 = tt;
return (0);
}
@@ -1069,6 +1095,47 @@ retry:
return (error);
}
+#ifdef VIMAGE
+int
+sysctl_handle_v_string(SYSCTL_HANDLER_ARGS)
+{
+ int error=0;
+ char *tmparg;
+ size_t outlen;
+
+ SYSCTL_RESOLVE_V_ARG1();
+
+ /*
+ * Attempt to get a coherent snapshot by copying to a
+ * temporary kernel buffer.
+ */
+retry:
+ outlen = strlen((char *)arg1)+1;
+ tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK);
+
+ if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) {
+ free(tmparg, M_SYSCTLTMP);
+ goto retry;
+ }
+
+ error = SYSCTL_OUT(req, tmparg, outlen);
+ free(tmparg, M_SYSCTLTMP);
+
+ if (error || !req->newptr)
+ return (error);
+
+ if ((req->newlen - req->newidx) >= arg2) {
+ error = EINVAL;
+ } else {
+ arg2 = (req->newlen - req->newidx);
+ error = SYSCTL_IN(req, arg1, arg2);
+ ((char *)arg1)[arg2] = '\0';
+ }
+
+ return (error);
+}
+#endif
+
/*
* Handle any kind of opaque data.
* arg1 points to it, arg2 is the size.
@@ -1106,6 +1173,35 @@ retry:
return (error);
}
+#ifdef VIMAGE
+int
+sysctl_handle_v_opaque(SYSCTL_HANDLER_ARGS)
+{
+ int error, tries;
+ u_int generation;
+ struct sysctl_req req2;
+
+ SYSCTL_RESOLVE_V_ARG1();
+
+ tries = 0;
+ req2 = *req;
+retry:
+ generation = curthread->td_generation;
+ error = SYSCTL_OUT(req, arg1, arg2);
+ if (error)
+ return (error);
+ tries++;
+ if (generation != curthread->td_generation && tries < 3) {
+ *req = req2;
+ goto retry;
+ }
+
+ error = SYSCTL_IN(req, arg1, arg2);
+
+ return (error);
+}
+#endif
+
/*
* Transfer functions to/from kernel space.
* XXX: rather untested at this point
diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c
index 156efec..310e328 100644
--- a/sys/kern/kern_vimage.c
+++ b/sys/kern/kern_vimage.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#ifndef VIMAGE_GLOBALS
MALLOC_DEFINE(M_VIMAGE, "vimage", "vimage resource container");
+MALLOC_DEFINE(M_VNET, "vnet", "network stack control block");
static TAILQ_HEAD(vnet_modlink_head, vnet_modlink) vnet_modlink_head;
static TAILQ_HEAD(vnet_modpending_head, vnet_modlink) vnet_modpending_head;
@@ -49,6 +50,12 @@ static void vnet_mod_complete_registration(struct vnet_modlink *);
static int vnet_mod_constructor(struct vnet_modlink *);
static int vnet_mod_destructor(struct vnet_modlink *);
+#ifdef VIMAGE
+/* curvnet should be thread-local - this is only a temporary step. */
+struct vnet *curvnet;
+struct vnet_list_head vnet_head;
+#endif
+
void
vnet_mod_register(const struct vnet_modinfo *vmi)
{
@@ -263,7 +270,14 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr)
for (mapentry = vml->vml_modinfo->vmi_symmap;
mapentry->name != NULL; mapentry++) {
if (strcmp(symstr, mapentry->name) == 0) {
- lookup->symvalue = (u_long) mapentry->base;
+#ifdef VIMAGE
+ lookup->symvalue =
+ (u_long) curvnet->mod_data[
+ vml->vml_modinfo->vmi_id];
+ lookup->symvalue += mapentry->offset;
+#else
+ lookup->symvalue = (u_long) mapentry->offset;
+#endif
lookup->symsize = mapentry->size;
return (0);
}
@@ -275,9 +289,23 @@ vi_symlookup(struct kld_sym_lookup *lookup, char *symstr)
static void
vi_init(void *unused)
{
+#ifdef VIMAGE
+ struct vnet *vnet;
+#endif
TAILQ_INIT(&vnet_modlink_head);
TAILQ_INIT(&vnet_modpending_head);
+
+#ifdef VIMAGE
+ LIST_INIT(&vnet_head);
+
+ vnet = malloc(sizeof(struct vnet), M_VNET, M_NOWAIT | M_ZERO);
+ if (vnet == NULL)
+ panic("vi_alloc: malloc failed");
+ LIST_INSERT_HEAD(&vnet_head, vnet, vnet_le);
+
+ curvnet = LIST_FIRST(&vnet_head);
+#endif
}
static void
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 9d9a731..a2f7c05 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -130,6 +130,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/jail.h>
+#include <sys/vimage.h>
#include <security/mac/mac_framework.h>
@@ -284,6 +285,9 @@ soalloc(void)
mtx_lock(&so_global_mtx);
so->so_gencnt = ++so_gencnt;
++numopensockets;
+#ifdef VIMAGE
+ so->so_vnet = curvnet;
+#endif
mtx_unlock(&so_global_mtx);
return (so);
}
diff --git a/sys/net/if.c b/sys/net/if.c
index 5c2e224..92bf8a6 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -186,6 +186,7 @@ static struct vnet_symmap vnet_net_symmap[] = {
static const vnet_modinfo_t vnet_net_modinfo = {
.vmi_id = VNET_MOD_NET,
.vmi_name = "net",
+ .vmi_size = sizeof(struct vnet_net),
.vmi_symmap = vnet_net_symmap,
.vmi_iattach = vnet_net_iattach
};
@@ -545,6 +546,7 @@ if_alloc(u_char type)
static void
if_free_internal(struct ifnet *ifp)
{
+ INIT_VNET_NET(ifp->if_vnet);
KASSERT((ifp->if_flags & IFF_DYING),
("if_free_internal: interface not dying"));
@@ -582,7 +584,6 @@ if_free_internal(struct ifnet *ifp)
void
if_free_type(struct ifnet *ifp, u_char type)
{
- INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */
KASSERT(ifp->if_alloctype == type,
("if_free_type: type (%d) != alloctype (%d)", type,
@@ -673,6 +674,10 @@ if_attach(struct ifnet *ifp)
panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
ifp->if_xname);
+#ifdef VIMAGE
+ ifp->if_vnet = curvnet;
+#endif
+
if_addgroup(ifp, IFG_ALL);
getmicrotime(&ifp->if_lastchange);
@@ -978,6 +983,9 @@ if_detach(struct ifnet *ifp)
}
IF_AFDATA_UNLOCK(ifp);
ifq_detach(&ifp->if_snd);
+#ifdef VIMAGE
+ ifp->if_vnet = NULL;
+#endif
splx(s);
}
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index a72d89c..50279a2 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -127,6 +127,7 @@ static int vnet_gif_iattach(const void *);
static const vnet_modinfo_t vnet_gif_modinfo = {
.vmi_id = VNET_MOD_GIF,
.vmi_name = "gif",
+ .vmi_size = sizeof(struct vnet_gif),
.vmi_dependson = VNET_MOD_NET,
.vmi_iattach = vnet_gif_iattach
};
@@ -303,8 +304,10 @@ gifmodevent(mod, type, data)
if_clone_detach(&gif_cloner);
mtx_destroy(&gif_mtx);
#ifdef INET6
+#ifndef VIMAGE
V_ip6_gif_hlim = 0; /* XXX -> vnet_gif_idetach() */
#endif
+#endif
break;
default:
return EOPNOTSUPP;
diff --git a/sys/net/if_mib.c b/sys/net/if_mib.c
index 9482eb9..4caa40f 100644
--- a/sys/net/if_mib.c
+++ b/sys/net/if_mib.c
@@ -77,7 +77,6 @@ SYSCTL_V_INT(V_NET, vnet_net, _net_link_generic_system, IFMIB_IFCOUNT,
static int
sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
{
- INIT_VNET_NET(curvnet);
int *name = (int *)arg1;
int error;
u_int namelen = arg2;
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index eb4986e..e6a6a26 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -117,6 +117,7 @@ struct ifqueue {
struct ifnet {
void *if_softc; /* pointer to driver state */
void *if_l2com; /* pointer to protocol bits */
+ struct vnet *if_vnet; /* pointer to network stack instance */
TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 9ff531a..dbc5ca8 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -126,7 +126,9 @@ sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
INIT_VNET_INET(curvnet);
int error;
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ SYSCTL_RESOLVE_V_ARG1();
+
+ error = sysctl_handle_int(oidp, arg1, arg2, req);
if (error == 0) {
RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 82b4126..c86f1ab 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -224,6 +224,8 @@ struct inpcb {
#define in6p_icmp6filt inp_depend6.inp6_icmp6filt
#define in6p_cksum inp_depend6.inp6_cksum
+#define inp_vnet inp_pcbinfo->ipi_vnet
+
/*
* The range of the generation count, as used in this implementation, is 9e19.
* We would have to create 300 billion connections per second for this number
@@ -301,8 +303,12 @@ struct inpcbinfo {
struct rwlock ipi_lock;
/*
- * vimage 1
- * general use 1
+ * Pointer to network stack instance
+ */
+ struct vnet *ipi_vnet;
+
+ /*
+ * general use 2
*/
void *ipi_pspare[2];
};
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index f88a8fd..3bd3049 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -162,6 +162,9 @@ div_init(void)
INP_INFO_LOCK_INIT(&V_divcbinfo, "div");
LIST_INIT(&V_divcb);
V_divcbinfo.ipi_listhead = &V_divcb;
+#ifdef VIMAGE
+ V_divcbinfo.ipi_vnet = curvnet;
+#endif
/*
* XXX We don't use the hash list for divert IP, but it's easier
* to allocate a one entry hash list than it is to check all
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index fa37a73..cfc3089 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -695,7 +695,6 @@ struct vnet_ipfw {
int _fw_deny_unknown_exthdrs;
int _fw_verbose;
int _verbose_limit;
- int _fw_debug; /* actually unused */
int _autoinc_step;
ipfw_dyn_rule **_ipfw_dyn_v;
uma_zone_t _ipfw_dyn_rule_zone;
@@ -740,7 +739,6 @@ extern struct vnet_ipfw vnet_ipfw_0;
#define V_fw_deny_unknown_exthdrs VNET_IPFW(fw_deny_unknown_exthdrs)
#define V_fw_verbose VNET_IPFW(fw_verbose)
#define V_verbose_limit VNET_IPFW(verbose_limit)
-#define V_fw_debug VNET_IPFW(fw_debug)
#define V_autoinc_step VNET_IPFW(autoinc_step)
#define V_ipfw_dyn_v VNET_IPFW(ipfw_dyn_v)
#define V_ipfw_dyn_rule_zone VNET_IPFW(ipfw_dyn_rule_zone)
diff --git a/sys/netinet/ip_fw_pfil.c b/sys/netinet/ip_fw_pfil.c
index c2f10b1..3064cd9 100644
--- a/sys/netinet/ip_fw_pfil.c
+++ b/sys/netinet/ip_fw_pfil.c
@@ -95,6 +95,7 @@ int
ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
+ INIT_VNET_INET(curvnet);
struct ip_fw_args args;
struct ng_ipfw_tag *ng_tag;
struct m_tag *dn_tag;
@@ -224,6 +225,7 @@ int
ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
+ INIT_VNET_INET(curvnet);
struct ip_fw_args args;
struct ng_ipfw_tag *ng_tag;
struct m_tag *dn_tag;
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index a294d0a..7ee5a13 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -237,6 +237,7 @@ static void vnet_inet_register(void);
static const vnet_modinfo_t vnet_inet_modinfo = {
.vmi_id = VNET_MOD_INET,
.vmi_name = "inet",
+ .vmi_size = sizeof(struct vnet_inet)
};
static void vnet_inet_register()
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 0775168..695a9cb 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -187,6 +187,9 @@ rip_init(void)
INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip");
LIST_INIT(&V_ripcb);
+#ifdef VIMAGE
+ V_ripcbinfo.ipi_vnet = curvnet;
+#endif
V_ripcbinfo.ipi_listhead = &V_ripcb;
V_ripcbinfo.ipi_hashbase =
hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 9e92aab..c1dc4b3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -359,6 +359,9 @@ tcp_init(void)
INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp");
LIST_INIT(&V_tcb);
+#ifdef VIMAGE
+ V_tcbinfo.ipi_vnet = curvnet;
+#endif
V_tcbinfo.ipi_listhead = &V_tcb;
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
@@ -703,6 +706,9 @@ tcp_newtcpcb(struct inpcb *inp)
if (tm == NULL)
return (NULL);
tp = &tm->tcb;
+#ifdef VIMAGE
+ tp->t_vnet = inp->inp_vnet;
+#endif
tp->t_timers = &tm->tt;
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
tp->t_maxseg = tp->t_maxopd =
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 18f3fb4..8e80842 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -259,6 +259,9 @@ syncache_init(void)
/* Initialize the hash buckets. */
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
+#ifdef VIMAGE
+ V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
+#endif
TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
NULL, MTX_DEF);
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index c367d33..e4e3fac 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -96,6 +96,7 @@ struct syncache {
#define SYNCOOKIE_LIFETIME 16 /* seconds */
struct syncache_head {
+ struct vnet *sch_vnet;
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 268db7c..5dc840e 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -35,6 +35,8 @@
#include <netinet/tcp.h>
+struct vnet;
+
/*
* Kernel variables for tcp.
*/
@@ -106,6 +108,8 @@ struct tcpcb {
int t_state; /* state of this connection */
u_int t_flags;
+ struct vnet *t_vnet; /* back pointer to parent vnet */
+
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
* used to recognize retransmits
@@ -186,8 +190,8 @@ struct tcpcb {
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
- void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
- struct toe_usrreqs *t_tu; /* offload operations vector */
+ void *t_pspare[3]; /* toe usrreqs / toepcb * / congestion algo / 1 general use */
+ struct toe_usrreqs *t_tu; /* offload operations vector */
void *t_toe; /* TOE pcb pointer */
int t_bytes_acked; /* # bytes acked during current RTT */
};
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index f2359eb..9aa83dd 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -179,6 +179,9 @@ udp_init(void)
INP_INFO_LOCK_INIT(&V_udbinfo, "udp");
LIST_INIT(&V_udb);
+#ifdef VIMAGE
+ V_udbinfo.ipi_vnet = curvnet;
+#endif
V_udbinfo.ipi_listhead = &V_udb;
V_udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
&V_udbinfo.ipi_hashmask);
diff --git a/sys/netinet/vinet.h b/sys/netinet/vinet.h
index 20a36c5..0057eff 100644
--- a/sys/netinet/vinet.h
+++ b/sys/netinet/vinet.h
@@ -54,7 +54,6 @@ struct vnet_inet {
struct in_ifaddrhashhead *_in_ifaddrhashtbl;
struct in_ifaddrhead _in_ifaddrhead;
u_long _in_ifaddrhmask;
- struct in_multihead _in_multihead; /* XXX unused */
int _arpt_keep;
int _arp_maxtries;
@@ -269,7 +268,6 @@ extern struct vnet_inet vnet_inet_0;
#define V_in_ifaddrhashtbl VNET_INET(in_ifaddrhashtbl)
#define V_in_ifaddrhead VNET_INET(in_ifaddrhead)
#define V_in_ifaddrhmask VNET_INET(in_ifaddrhmask)
-#define V_in_multihead VNET_INET(in_multihead)
#define V_ip_checkinterface VNET_INET(ip_checkinterface)
#define V_ip_defttl VNET_INET(ip_defttl)
#define V_ip_do_randomid VNET_INET(ip_do_randomid)
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index 077014e..4738f91 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -888,8 +888,9 @@ in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
}
void
-in6_tmpaddrtimer(void *ignored_arg)
+in6_tmpaddrtimer(void *arg)
{
+ CURVNET_SET((struct vnet *) arg);
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct nd_ifinfo *ndi;
@@ -898,7 +899,7 @@ in6_tmpaddrtimer(void *ignored_arg)
callout_reset(&V_in6_tmpaddrtimer_ch,
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
- V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL);
+ V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet);
bzero(nullbuf, sizeof(nullbuf));
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
@@ -914,12 +915,12 @@ in6_tmpaddrtimer(void *ignored_arg)
}
}
+ CURVNET_RESTORE();
}
static void
in6_purgemaddrs(struct ifnet *ifp)
{
- INIT_VNET_INET6(ifp->if_vnet);
LIST_HEAD(,in6_multi) purgeinms;
struct in6_multi *inm, *tinm;
struct ifmultiaddr *ifma;
diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c
index b3f272c..89fbedb 100644
--- a/sys/netinet6/in6_mcast.c
+++ b/sys/netinet6/in6_mcast.c
@@ -1302,7 +1302,6 @@ static int
in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
- INIT_VNET_INET6(curvnet);
struct group_source_req gsr;
sockunion_t *gsa, *ssa;
struct ifnet *ifp;
@@ -1463,6 +1462,7 @@ out_in6p_locked:
static struct ip6_moptions *
in6p_findmoptions(struct inpcb *inp)
{
+ INIT_VNET_INET6(curvnet);
struct ip6_moptions *imo;
struct in6_multi **immp;
struct in6_mfilter *imfp;
@@ -1745,7 +1745,6 @@ static struct ifnet *
in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused,
const struct sockaddr_in6 *gsin6)
{
- INIT_VNET_INET6(curvnet);
struct route_in6 ro6;
struct ifnet *ifp;
@@ -2032,7 +2031,6 @@ static int
in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
- INIT_VNET_INET(curvnet);
struct group_source_req gsr;
sockunion_t *gsa, *ssa;
struct ifnet *ifp;
@@ -2249,7 +2247,6 @@ static int
in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
{
INIT_VNET_NET(curvnet);
- INIT_VNET_INET6(curvnet);
struct ifnet *ifp;
struct ip6_moptions *imo;
u_int ifindex;
@@ -2454,6 +2451,7 @@ out_in6p_locked:
int
ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
{
+ INIT_VNET_INET6(curvnet);
struct ip6_moptions *im6o;
int error;
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index 622777c..c0b0b25 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -447,6 +447,8 @@ sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
int error = 0;
int old;
+ SYSCTL_RESOLVE_V_ARG1();
+
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);
@@ -467,6 +469,8 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
int error = 0;
int old;
+ SYSCTL_RESOLVE_V_ARG1();
+
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
index 69ac45c..b0cf344 100644
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -161,6 +161,7 @@ static void vnet_inet6_register(void);
static const vnet_modinfo_t vnet_inet6_modinfo = {
.vmi_id = VNET_MOD_INET6,
.vmi_name = "inet6",
+ .vmi_size = sizeof(struct vnet_inet6),
.vmi_dependson = VNET_MOD_INET /* XXX revisit - TCP/UDP needs this? */
};
@@ -307,14 +308,14 @@ ip6_init2_vnet(const void *unused __unused)
/* nd6_timer_init */
callout_init(&V_nd6_timer_ch, 0);
- callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);
+ callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
/* timer for regeneranation of temporary addresses randomize ID */
callout_init(&V_in6_tmpaddrtimer_ch, 0);
callout_reset(&V_in6_tmpaddrtimer_ch,
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
V_ip6_temp_regen_advance) * hz,
- in6_tmpaddrtimer, NULL);
+ in6_tmpaddrtimer, curvnet);
return (0);
}
diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c
index 17b1df8..4359322 100644
--- a/sys/netinet6/mld6.c
+++ b/sys/netinet6/mld6.c
@@ -435,7 +435,6 @@ mld_dispatch_queue(struct ifqueue *ifq, int limit)
static __inline int
mld_is_addr_reported(const struct in6_addr *addr)
{
- INIT_VNET_INET6(curvnet);
KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__));
@@ -639,7 +638,6 @@ static int
mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
const struct mld_hdr *mld)
{
- INIT_VNET_INET6(ifp->if_vnet);
struct ifmultiaddr *ifma;
struct mld_ifinfo *mli;
struct in6_multi *inm;
@@ -1034,7 +1032,6 @@ static int
mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
const struct mld_hdr *mld)
{
- INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia;
struct in6_multi *inm;
#ifdef KTR
@@ -1646,7 +1643,6 @@ mld_slowtimo_vnet(void)
static void
mld_v1_process_querier_timers(struct mld_ifinfo *mli)
{
- INIT_VNET_INET6(curvnet);
MLD_LOCK_ASSERT();
@@ -3009,7 +3005,6 @@ out:
static struct mbuf *
mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
{
- INIT_VNET_INET6(curvnet);
struct mbuf *mh;
struct mldv2_report *mld;
struct ip6_hdr *ip6;
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index 387e77b..e387a7f 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -191,7 +191,7 @@ nd6_init(void)
/* start timer */
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
- nd6_slowtimo, NULL);
+ nd6_slowtimo, curvnet);
nd6_init_done = 1;
@@ -593,7 +593,7 @@ void
nd6_timer(void *arg)
{
CURVNET_SET_QUIET((struct vnet *) arg);
- INIT_VNET_INET6((struct vnet *) arg);
+ INIT_VNET_INET6(curvnet);
int s;
struct nd_defrouter *dr;
struct nd_prefix *pr;
@@ -601,7 +601,7 @@ nd6_timer(void *arg)
struct in6_addrlifetime *lt6;
callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
- nd6_timer, NULL);
+ nd6_timer, curvnet);
/* expire default router list */
s = splnet();
@@ -872,7 +872,6 @@ nd6_purge(struct ifnet *ifp)
struct llentry *
nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
{
- INIT_VNET_INET6(curvnet);
struct sockaddr_in6 sin6;
struct llentry *ln;
int llflags = 0;
@@ -1669,7 +1668,7 @@ nd6_slowtimo(void *arg)
struct ifnet *ifp;
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
- nd6_slowtimo, NULL);
+ nd6_slowtimo, curvnet);
IFNET_RLOCK();
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
ifp = TAILQ_NEXT(ifp, if_list)) {
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index c340ffd..43be628 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -651,6 +651,7 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
static void
rip6_detach(struct socket *so)
{
+ INIT_VNET_INET(so->so_vnet);
INIT_VNET_INET6(so->so_vnet);
struct inpcb *inp;
diff --git a/sys/netinet6/vinet6.h b/sys/netinet6/vinet6.h
index 76bbec9..2e96d04 100644
--- a/sys/netinet6/vinet6.h
+++ b/sys/netinet6/vinet6.h
@@ -54,8 +54,6 @@ struct vnet_inet6 {
u_int _frag6_nfrags;
struct ip6q _ip6q;
- struct route_in6 _ip6_forward_rt; /* XXX remove */
-
struct in6_addrpolicy _defaultaddrpolicy;
TAILQ_HEAD(, addrsel_policyent) _addrsel_policytab;
u_int _in6_maxmtu;
@@ -122,10 +120,6 @@ struct vnet_inet6 {
int _udp6_recvspace;
int _ip6qmaxlen;
int _ip6_prefer_tempaddr;
- int _ip6_forward_srcrt; /* XXX remove */
- int _ip6_sourcecheck; /* XXX remove */
- int _ip6_sourcecheck_interval; /* XXX remove */
- int _ip6_ours_check_algorithm; /* XXX remove */
int _nd6_prune;
int _nd6_delay;
diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c
index a76afd2..4124d9d 100644
--- a/sys/netipsec/ipsec.c
+++ b/sys/netipsec/ipsec.c
@@ -248,6 +248,7 @@ MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy");
static const vnet_modinfo_t vnet_ipsec_modinfo = {
.vmi_id = VNET_MOD_IPSEC,
.vmi_name = "ipsec",
+ .vmi_size = sizeof(struct vnet_ipsec),
.vmi_dependson = VNET_MOD_INET, /* XXX revisit - INET6 ? */
.vmi_iattach = ipsec_iattach
};
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 91f153b..fcf1a53 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 800084 /* Master, propagated to newvers */
+#define __FreeBSD_version 800085 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 8ce97e5..81e6b88 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -45,6 +45,8 @@
#include <sys/sockopt.h>
#endif
+struct vnet;
+
/*
* Kernel structure per socket.
* Contains send and receive buffer queues,
@@ -72,6 +74,7 @@ struct socket {
short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
+ struct vnet *so_vnet; /* network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 7d0afb3..c6da7b9 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -163,6 +163,8 @@ struct sysctl_oid {
const char *oid_fmt;
int oid_refcnt;
const char *oid_descr;
+ short oid_v_subs;
+ short oid_v_mod;
};
#define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
@@ -292,7 +294,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#ifdef VIMAGE
#define SYSCTL_V_INT(subs, mod, parent, nbr, name, access, sym, val, descr) \
- SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_INT|(access), \
+ SYSCTL_V_OID(subs, mod, parent, nbr, name, \
+ CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \
sym, val, sysctl_handle_v_int, "I", descr)
#else
#ifdef VIMAGE_GLOBALS
@@ -317,7 +320,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#ifdef VIMAGE
#define SYSCTL_V_UINT(subs, mod, parent, nbr, name, access, sym, val, descr) \
- SYSCTL_V_OID(subs, mod, parent, nbr, name, CTLTYPE_UINT|(access), \
+ SYSCTL_V_OID(subs, mod, parent, nbr, name, \
+ CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \
sym, val, sysctl_handle_v_int, "IU", descr)
#else
#ifdef VIMAGE_GLOBALS
@@ -440,6 +444,29 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define FEATURE(name, desc) \
SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc)
+/*
+ * Resolve void *arg1 in a proper virtualization container.
+ */
+#ifdef VIMAGE
+#define SYSCTL_RESOLVE_V_ARG1() do { \
+ char *cp; \
+ switch (oidp->oid_v_subs) { \
+ case V_GLOBAL: \
+ /* do nothing - this is NOT a virtualized variable! */ \
+ break; \
+ case V_NET: \
+ cp = (char *) \
+ TD_TO_VNET(curthread)->mod_data[oidp->oid_v_mod]; \
+ arg1 = cp + (size_t) arg1; \
+ break; \
+ default: \
+ panic("unsupported module id %d", oidp->oid_v_subs); \
+ } \
+} while (0)
+#else
+#define SYSCTL_RESOLVE_V_ARG1()
+#endif
+
#endif /* _KERNEL */
/*
diff --git a/sys/sys/vimage.h b/sys/sys/vimage.h
index 013d8bc..b5edd78 100644
--- a/sys/sys/vimage.h
+++ b/sys/sys/vimage.h
@@ -39,6 +39,10 @@
#error "You cannot have both option VIMAGE and option VIMAGE_GLOBALS!"
#endif
+#ifdef INVARIANTS
+#define VNET_DEBUG
+#endif
+
typedef int vnet_attach_fn(const void *);
typedef int vnet_detach_fn(const void *);
@@ -48,8 +52,8 @@ struct kld_sym_lookup;
struct vnet_symmap {
char *name;
- void *base;
- size_t size;
+ size_t offset;
+ size_t size;
};
typedef struct vnet_symmap vnet_symmap_t;
@@ -59,7 +63,7 @@ struct vnet_modinfo {
char *vmi_name;
vnet_attach_fn *vmi_iattach;
vnet_detach_fn *vmi_idetach;
- size_t vmi_struct_size;
+ size_t vmi_size;
struct vnet_symmap *vmi_symmap;
};
typedef struct vnet_modinfo vnet_modinfo_t;
@@ -71,13 +75,7 @@ struct vnet_modlink {
const char *vml_iname;
};
-#define VNET_SYMMAP(mod, name) \
- { #name, &(vnet_ ## mod ## _0._ ## name), \
- sizeof(vnet_ ## mod ## _0._ ## name) }
-
-#define VNET_SYMMAP_END { NULL, 0 }
-
-/* stateful modules */
+/* Stateful modules. */
#define VNET_MOD_NET 0 /* MUST be 0 - implicit dependency */
#define VNET_MOD_NETGRAPH 1
#define VNET_MOD_INET 2
@@ -93,7 +91,7 @@ struct vnet_modlink {
#define VNET_MOD_IGMP 12
#define VNET_MOD_MLD 13
-/* stateless modules */
+/* Stateless modules. */
#define VNET_MOD_NG_ETHER 20
#define VNET_MOD_NG_IFACE 21
#define VNET_MOD_NG_EIFACE 22
@@ -109,7 +107,11 @@ struct vnet_modlink {
#define VNET_MOD_DYNAMIC_START 32
#define VNET_MOD_MAX 64
-/* Sysctl virtualization macros need these name mappings bellow */
+/* Major module IDs for vimage sysctl virtualization. */
+#define V_GLOBAL 0 /* global variable - no indirection */
+#define V_NET 1
+
+/* Name mappings for minor module IDs in vimage sysctl virtualization. */
#define V_MOD_vnet_net VNET_MOD_NET
#define V_MOD_vnet_netgraph VNET_MOD_NETGRAPH
#define V_MOD_vnet_inet VNET_MOD_INET
@@ -131,27 +133,82 @@ void vnet_mod_deregister_multi(const struct vnet_modinfo *, void *, char *);
#define VSYM(base, sym) (sym)
#else
#ifdef VIMAGE
-#error "No option VIMAGE yet!"
+#define VSYM(base, sym) ((base)->_ ## sym)
#else
#define VSYM(base, sym) (base ## _0._ ## sym)
#endif
#endif
+#ifndef VIMAGE_GLOBALS
+#ifdef VIMAGE
+/*
+ * Casted NULL hack is needed for harvesting sizeofs() of fields inside
+ * struct vnet_* containers at compile time.
+ */
+#define VNET_SYMMAP(mod, name) \
+ { #name, offsetof(struct vnet_ ## mod, _ ## name), \
+ sizeof(((struct vnet_ ## mod *) NULL)->_ ## name) }
+#else
+#define VNET_SYMMAP(mod, name) \
+ { #name, (size_t) &(vnet_ ## mod ## _0._ ## name), \
+ sizeof(vnet_ ## mod ## _0._ ## name) }
+#endif
+#define VNET_SYMMAP_END { NULL, 0 }
+#endif /* !VIMAGE_GLOBALS */
+
+#ifdef VIMAGE
+struct vnet {
+ void *mod_data[VNET_MOD_MAX];
+ LIST_ENTRY(vnet) vnet_le; /* all vnets list */
+ u_int vnet_magic_n;
+};
+#endif
+
+#ifdef VIMAGE
+extern struct vnet *curvnet; /* XXX will become thread-local soon */
+#else
+#define curvnet NULL
+#endif
+
+#ifdef VIMAGE
+#ifdef VNET_DEBUG
+#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \
+ if (vnet == NULL || vnet != curvnet) \
+ panic("in %s:%d %s()\n vnet=%p curvnet=%p", \
+ __FILE__, __LINE__, __FUNCTION__, \
+ vnet, curvnet); \
+ modtype *sym = (vnet)->mod_data[modindex];
+#else /* !VNET_DEBUG */
+#define INIT_FROM_VNET(vnet, modindex, modtype, sym) \
+ modtype *sym = (vnet)->mod_data[modindex];
+#endif /* !VNET_DEBUG */
+#else /* !VIMAGE */
+#define INIT_FROM_VNET(vnet, modindex, modtype, sym)
+#endif
+
+#ifdef VIMAGE
+LIST_HEAD(vnet_list_head, vnet);
+extern struct vnet_list_head vnet_head;
+#define VNET_ITERATOR_DECL(arg) struct vnet *arg;
+#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le)
+#else
+#define VNET_ITERATOR_DECL(arg)
+#define VNET_FOREACH(arg)
+#endif
+
+#define TD_TO_VNET(td) curvnet
+
/* Non-VIMAGE null-macros */
#define IS_DEFAULT_VNET(arg) 1
#define CURVNET_SET(arg)
#define CURVNET_SET_QUIET(arg)
#define CURVNET_RESTORE()
#define VNET_ASSERT(condition)
-#define INIT_FROM_VNET(vnet, modindex, modtype, sym)
-#define VNET_ITERATOR_DECL(arg)
-#define VNET_FOREACH(arg)
#define VNET_LIST_RLOCK()
#define VNET_LIST_RUNLOCK()
#define INIT_VPROCG(arg)
#define INIT_VCPU(arg)
#define TD_TO_VIMAGE(td)
-#define TD_TO_VNET(td)
#define TD_TO_VPROCG(td)
#define TD_TO_VCPU(td)
#define P_TO_VIMAGE(p)
OpenPOWER on IntegriCloud