summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2009-07-19 14:20:53 +0000
committerrwatson <rwatson@FreeBSD.org>2009-07-19 14:20:53 +0000
commit69550679325e630b5720649bc0f8787e31161476 (patch)
tree3bd297fbee234a2e1c20d56c0d53370c6ab32793 /sys
parentc70ad2698e22375c9f47db2c3e6a5ee7f3fc937c (diff)
downloadFreeBSD-src-69550679325e630b5720649bc0f8787e31161476.zip
FreeBSD-src-69550679325e630b5720649bc0f8787e31161476.tar.gz
Reimplement and/or implement vnet list locking by replacing a mostly
unused custom mutex/condvar-based sleep locks with two locks: an rwlock (for non-sleeping use) and sxlock (for sleeping use). Either acquired for read is sufficient to stabilize the vnet list, but both must be acquired for write to modify the list. Replace previous no-op read locking macros, used in various places in the stack, with actual locking to prevent race conditions. Callers must declare when they may perform unbounded sleeps or not when selecting how to lock. Refactor vnet sysinits so that the vnet list and locks are initialized before kernel modules are linked, as the kernel linker will use them for modules loaded by the boot loader. Update various consumers of these KPIs based on whether they may sleep or not. Reviewed by: bz Approved by: re (kib)
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/kern_vimage.c48
-rw-r--r--sys/net/if.c4
-rw-r--r--sys/netgraph/ng_gif.c4
-rw-r--r--sys/netinet/igmp.c8
-rw-r--r--sys/netinet/in_pcb.c4
-rw-r--r--sys/netinet/in_rmx.c4
-rw-r--r--sys/netinet/ip_input.c8
-rw-r--r--sys/netinet/tcp_subr.c8
-rw-r--r--sys/netinet/tcp_timer.c4
-rw-r--r--sys/netinet6/frag6.c12
-rw-r--r--sys/netinet6/mld6.c8
-rw-r--r--sys/netipsec/key.c4
-rw-r--r--sys/sys/kernel.h5
-rw-r--r--sys/sys/vimage.h42
14 files changed, 98 insertions, 65 deletions
diff --git a/sys/kern/kern_vimage.c b/sys/kern/kern_vimage.c
index 328b2ba..f3d4a86 100644
--- a/sys/kern/kern_vimage.c
+++ b/sys/kern/kern_vimage.c
@@ -58,23 +58,22 @@ static void vnet_mod_complete_registration(struct vnet_modlink *);
static int vnet_mod_constructor(struct vnet_modlink *);
static int vnet_mod_destructor(struct vnet_modlink *);
-#define VNET_LIST_WLOCK() \
- mtx_lock(&vnet_list_refc_mtx); \
- while (vnet_list_refc != 0) \
- cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx);
+struct rwlock vnet_rwlock;
+struct sx vnet_sxlock;
-#define VNET_LIST_WUNLOCK() \
- mtx_unlock(&vnet_list_refc_mtx);
+#define VNET_LIST_WLOCK() do { \
+ sx_xlock(&vnet_sxlock); \
+ rw_wlock(&vnet_rwlock); \
+} while (0)
-struct vnet_list_head vnet_head;
-
-struct cv vnet_list_condvar;
-struct mtx vnet_list_refc_mtx;
-int vnet_list_refc = 0;
+#define VNET_LIST_WUNLOCK() do { \
+ rw_wunlock(&vnet_rwlock); \
+ sx_xunlock(&vnet_sxlock); \
+} while (0)
+struct vnet_list_head vnet_head;
struct vnet *vnet0;
-
/*
* Move an ifnet to or from another vnet, specified by the jail id.
*/
@@ -373,16 +372,22 @@ vnet_foreach(void (*vnet_foreach_fn)(struct vnet *, void *), void *arg)
}
static void
-vi_init(void *unused)
+vnet_init_prelink(void *arg)
{
- TAILQ_INIT(&vnet_modlink_head);
- TAILQ_INIT(&vnet_modpending_head);
-
+ rw_init(&vnet_rwlock, "vnet_rwlock");
+ sx_init(&vnet_sxlock, "vnet_sxlock");
LIST_INIT(&vnet_head);
+}
+SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST,
+ vnet_init_prelink, NULL);
+
+static void
+vnet0_init(void *arg)
+{
- mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF);
- cv_init(&vnet_list_condvar, "vnet_list_condvar");
+ TAILQ_INIT(&vnet_modlink_head);
+ TAILQ_INIT(&vnet_modpending_head);
/*
* We MUST clear curvnet in vi_init_done() before going SMP,
@@ -391,9 +396,10 @@ vi_init(void *unused)
*/
curvnet = prison0.pr_vnet = vnet0 = vnet_alloc();
}
+SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL);
static void
-vi_init_done(void *unused)
+vnet_init_done(void *unused)
{
struct vnet_modlink *vml_iter;
@@ -411,8 +417,8 @@ vi_init_done(void *unused)
panic("going nowhere without my vnet modules!");
}
-SYSINIT(vimage, SI_SUB_VIMAGE, SI_ORDER_FIRST, vi_init, NULL);
-SYSINIT(vimage_done, SI_SUB_VIMAGE_DONE, SI_ORDER_FIRST, vi_init_done, NULL);
+SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done,
+ NULL);
#ifdef DDB
DB_SHOW_COMMAND(vnets, db_show_vnets)
diff --git a/sys/net/if.c b/sys/net/if.c
index e779e60..a267747 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -1793,8 +1793,8 @@ if_slowtimo(void *arg)
struct ifnet *ifp;
int s = splimp();
+ VNET_LIST_RLOCK_NOSLEEP();
IFNET_RLOCK();
- VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@@ -1805,8 +1805,8 @@ if_slowtimo(void *arg)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
splx(s);
timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
}
diff --git a/sys/netgraph/ng_gif.c b/sys/netgraph/ng_gif.c
index f099d61..de2ded3 100644
--- a/sys/netgraph/ng_gif.c
+++ b/sys/netgraph/ng_gif.c
@@ -561,8 +561,8 @@ ng_gif_mod_event(module_t mod, int event, void *data)
ng_gif_input_orphan_p = ng_gif_input_orphan;
/* Create nodes for any already-existing gif interfaces */
+ VNET_LIST_RLOCK_NOSLEEP();
IFNET_RLOCK();
- VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET_QUIET(vnet_iter); /* XXX revisit quiet */
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@@ -571,8 +571,8 @@ ng_gif_mod_event(module_t mod, int event, void *data)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
break;
case MOD_UNLOAD:
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
index 37cc1b3..6a7cb83 100644
--- a/sys/netinet/igmp.c
+++ b/sys/netinet/igmp.c
@@ -1616,13 +1616,13 @@ igmp_fasttimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
igmp_fasttimo_vnet();
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -2159,13 +2159,13 @@ igmp_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
igmp_slowtimo_vnet();
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 94906ad..5d080ba 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1570,7 +1570,7 @@ ipport_tick(void *xtp)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
if (V_ipport_tcpallocs <=
@@ -1582,7 +1582,7 @@ ipport_tick(void *xtp)
V_ipport_tcplastcount = V_ipport_tcpallocs;
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}
diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c
index 2ef4ed6..d3f9563 100644
--- a/sys/netinet/in_rmx.c
+++ b/sys/netinet/in_rmx.c
@@ -319,7 +319,7 @@ in_rtqdrain(void)
struct rtqk_arg arg;
int fibnum;
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@@ -336,7 +336,7 @@ in_rtqdrain(void)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
static int _in_rt_was_here;
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 8e84707..61fc835 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1193,8 +1193,8 @@ ip_slowtimo(void)
struct ipq *fp;
int i;
+ VNET_LIST_RLOCK_NOSLEEP();
IPQ_LOCK();
- VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
@@ -1228,8 +1228,8 @@ ip_slowtimo(void)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IPQ_UNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -1241,8 +1241,8 @@ ip_drain(void)
VNET_ITERATOR_DECL(vnet_iter);
int i;
+ VNET_LIST_RLOCK_NOSLEEP();
IPQ_LOCK();
- VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
@@ -1254,8 +1254,8 @@ ip_drain(void)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IPQ_UNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
in_rtqdrain();
}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index df61a00..8ebc889 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -940,7 +940,7 @@ tcp_drain(void)
if (!do_tcpdrain)
return;
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
struct inpcb *inpb;
@@ -976,7 +976,7 @@ tcp_drain(void)
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -1576,7 +1576,7 @@ tcp_isn_tick(void *xtp)
VNET_ITERATOR_DECL(vnet_iter);
u_int32_t projected_offset;
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
ISN_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */
@@ -1590,7 +1590,7 @@ tcp_isn_tick(void *xtp)
CURVNET_RESTORE();
}
ISN_UNLOCK();
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
}
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 7006b70..3050324 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -127,7 +127,7 @@ tcp_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
@@ -136,7 +136,7 @@ tcp_slowtimo(void)
INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c
index 3fd8605..ee1f4a2 100644
--- a/sys/netinet6/frag6.c
+++ b/sys/netinet6/frag6.c
@@ -720,8 +720,8 @@ frag6_slowtimo(void)
VNET_ITERATOR_DECL(vnet_iter);
struct ip6q *q6;
+ VNET_LIST_RLOCK_NOSLEEP();
IP6Q_LOCK();
- VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
q6 = V_ip6q.ip6q_next;
@@ -748,8 +748,8 @@ frag6_slowtimo(void)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IP6Q_UNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -760,9 +760,11 @@ frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- if (IP6Q_TRYLOCK() == 0)
+ VNET_LIST_RLOCK_NOSLEEP();
+ if (IP6Q_TRYLOCK() == 0) {
+ VNET_LIST_RUNLOCK_NOSLEEP();
return;
- VNET_LIST_RLOCK();
+ }
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
while (V_ip6q.ip6q_next != &V_ip6q) {
@@ -772,6 +774,6 @@ frag6_drain(void)
}
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
IP6Q_UNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c
index 9d69e11..4b69da2 100644
--- a/sys/netinet6/mld6.c
+++ b/sys/netinet6/mld6.c
@@ -1306,13 +1306,13 @@ mld_fasttimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
mld_fasttimo_vnet();
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@@ -1721,13 +1721,13 @@ mld_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
mld_slowtimo_vnet();
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c
index 4f2d27e..e90396b 100644
--- a/sys/netipsec/key.c
+++ b/sys/netipsec/key.c
@@ -4537,7 +4537,7 @@ key_timehandler(void)
VNET_ITERATOR_DECL(vnet_iter);
time_t now = time_second;
- VNET_LIST_RLOCK();
+ VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
key_flush_spd(now);
@@ -4546,7 +4546,7 @@ key_timehandler(void)
key_flush_spacq(now);
CURVNET_RESTORE();
}
- VNET_LIST_RUNLOCK();
+ VNET_LIST_RUNLOCK_NOSLEEP();
#ifndef IPSEC_DEBUG2
/* do exchange to tick time !! */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 478b0d8..87a4bfe 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -106,13 +106,14 @@ enum sysinit_sub_id {
SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */
SI_SUB_LOCK = 0x1B00000, /* various locks */
SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */
+ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */
SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */
- SI_SUB_VIMAGE = 0x21E0000, /* vimage infrastructure */
+ SI_SUB_VNET = 0x21E0000, /* vnet 0 */
SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/
SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/
SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */
@@ -158,7 +159,7 @@ enum sysinit_sub_id {
SI_SUB_SWAP = 0xc000000, /* swap */
SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/
SI_SUB_SYSCALLS = 0xd800000, /* register system calls */
- SI_SUB_VIMAGE_DONE = 0xdc00000, /* vnet registration complete */
+ SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */
SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/
SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/
SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/
diff --git a/sys/sys/vimage.h b/sys/sys/vimage.h
index be83516..69ab3d6 100644
--- a/sys/sys/vimage.h
+++ b/sys/sys/vimage.h
@@ -38,6 +38,9 @@
#ifdef _KERNEL
+#include <sys/lock.h>
+#include <sys/sx.h>
+
#ifdef INVARIANTS
#define VNET_DEBUG
#endif
@@ -176,17 +179,42 @@ struct vnet {
#endif /* !VIMAGE */
#ifdef VIMAGE
+/*
+ * Global linked list of all virtual network stacks, along with read locks to
+ * access it. If a caller may sleep while accessing the list, it must use
+ * the sleepable lock macros.
+ */
LIST_HEAD(vnet_list_head, vnet);
extern struct vnet_list_head vnet_head;
-extern struct vnet *vnet0;
-#define VNET_ITERATOR_DECL(arg) struct vnet *arg;
-#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le)
-#else
+extern struct rwlock vnet_rwlock;
+extern struct sx vnet_sxlock;
+
+#define VNET_LIST_RLOCK() sx_slock(&vnet_sxlock)
+#define VNET_LIST_RLOCK_NOSLEEP() rw_rlock(&vnet_rwlock)
+#define VNET_LIST_RUNLOCK() sx_sunlock(&vnet_sxlock)
+#define VNET_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vnet_rwlock)
+
+/*
+ * Iteration macros to walk the global list of virtual network stacks.
+ */
+#define VNET_ITERATOR_DECL(arg) struct vnet *arg
+#define VNET_FOREACH(arg) LIST_FOREACH((arg), &vnet_head, vnet_le)
+
+#else /* !VIMAGE */
+/*
+ * No-op macros for the !VIMAGE case.
+ */
+#define VNET_LIST_RLOCK()
+#define VNET_LIST_RLOCK_NOSLEEP()
+#define VNET_LIST_RUNLOCK()
+#define VNET_LIST_RUNLOCK_NOSLEEP()
#define VNET_ITERATOR_DECL(arg)
#define VNET_FOREACH(arg)
-#endif
+
+#endif /* VIMAGE */
#ifdef VIMAGE
+extern struct vnet *vnet0;
#define IS_DEFAULT_VNET(arg) ((arg) == vnet0)
#else
#define IS_DEFAULT_VNET(arg) 1
@@ -202,10 +230,6 @@ extern struct vnet *vnet0;
#define P_TO_VNET(p) NULL
#endif /* VIMAGE */
-/* Non-VIMAGE null-macros */
-#define VNET_LIST_RLOCK()
-#define VNET_LIST_RUNLOCK()
-
#endif /* _KERNEL */
#endif /* !_SYS_VIMAGE_H_ */
OpenPOWER on IntegriCloud