summaryrefslogtreecommitdiffstats
path: root/sys/net
diff options
context:
space:
mode:
authorcsjp <csjp@FreeBSD.org>2006-06-02 19:59:33 +0000
committercsjp <csjp@FreeBSD.org>2006-06-02 19:59:33 +0000
commit2c4f67981e37d4914db61b39de9ce50520b8ab77 (patch)
tree91b5bc64ab856cef269d9fab6ff3feca3e06cf2c /sys/net
parent420f0a56b11b92d44992ae037cd8d5e18cc582f6 (diff)
downloadFreeBSD-src-2c4f67981e37d4914db61b39de9ce50520b8ab77.zip
FreeBSD-src-2c4f67981e37d4914db61b39de9ce50520b8ab77.tar.gz
Fix the following bpf(4) race condition which can result in a panic:
(1) bpf peer attaches to interface netif0 (2) Packet is received by netif0 (3) ifp->if_bpf pointer is checked and handed off to bpf (4) bpf peer detaches from netif0 resulting in ifp->if_bpf being initialized to NULL. (5) ifp->if_bpf is dereferenced by bpf machinery (6) Kaboom This race condition likely explains the various different kernel panics reported around sending SIGINT to tcpdump or dhclient processes. But really this race can result in kernel panics anywhere you have frequent bpf attach and detach operations with high packet per second load. Summary of changes: - Remove the bpf interface's "driverp" member - When we attach bpf interfaces, we now set the ifp->if_bpf member to the bpf interface structure. Once this is done, ifp->if_bpf should never be NULL. [1] - Introduce bpf_peers_present function, an inline operation which will do a lockless read bpf peer list associated with the interface. It should be noted that the bpf code will pickup the bpf_interface lock before adding or removing bpf peers. This should serialize the access to the bpf descriptor list, removing the race. - Expose the bpf_if structure in bpf.h so that the bpf_peers_present function can use it. This also removes the struct bpf_if; hack that was there. - Adjust all consumers of the raw if_bpf structure to use bpf_peers_present Now what happens is: (1) Packet is received by netif0 (2) Check to see if bpf descriptor list is empty (3) Pickup the bpf interface lock (4) Hand packet off to process From the attach/detach side: (1) Pickup the bpf interface lock (2) Add/remove from bpf descriptor list Now that we are storing the bpf interface structure with the ifnet, there is is no need to walk the bpf interface list to locate the correct bpf interface. We now simply look up the interface, and initialize the pointer. This has a nice side effect of changing a bpf interface attach operation from O(N) (where N is the number of bpf interfaces), to O(1). [1] From now on, we can no longer check ifp->if_bpf to tell us whether or not we have any bpf peers that might be interested in receiving packets. In collaboration with: sam@ MFC after: 1 month
Diffstat (limited to 'sys/net')
-rw-r--r--sys/net/bpf.c95
-rw-r--r--sys/net/bpf.h26
-rw-r--r--sys/net/bpfdesc.h13
-rw-r--r--sys/net/if_disc.c2
-rw-r--r--sys/net/if_gif.c6
-rw-r--r--sys/net/if_gre.c2
-rw-r--r--sys/net/if_loop.c2
-rw-r--r--sys/net/if_sl.c8
-rw-r--r--sys/net/if_stf.c4
-rw-r--r--sys/net/if_tun.c2
10 files changed, 58 insertions, 102 deletions
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 335bbb1..00a95c8 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -301,7 +301,6 @@ bpf_attachd(d, bp)
LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
bpf_bpfd_cnt++;
- *bp->bif_driverp = bp;
BPFIF_UNLOCK(bp);
}
@@ -327,12 +326,6 @@ bpf_detachd(d)
LIST_REMOVE(d, bd_next);
bpf_bpfd_cnt--;
- /*
- * Let the driver know that there are no more listeners.
- */
- if (LIST_EMPTY(&bp->bif_dlist))
- *bp->bif_driverp = NULL;
-
d->bd_bif = NULL;
BPFD_UNLOCK(d);
BPFIF_UNLOCK(bp);
@@ -1106,51 +1099,33 @@ bpf_setif(d, ifr)
struct ifnet *theywant;
theywant = ifunit(ifr->ifr_name);
- if (theywant == NULL)
- return ENXIO;
+ if (theywant == NULL || theywant->if_bpf == NULL)
+ return (ENXIO);
+ bp = theywant->if_bpf;
/*
- * Look through attached interfaces for the named one.
+ * Allocate the packet buffers if we need to.
+ * If we're already attached to requested interface,
+ * just flush the buffer.
*/
- mtx_lock(&bpf_mtx);
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- struct ifnet *ifp = bp->bif_ifp;
-
- if (ifp == NULL || ifp != theywant)
- continue;
- /* skip additional entry */
- if (bp->bif_driverp != &ifp->if_bpf)
- continue;
-
- mtx_unlock(&bpf_mtx);
- /*
- * We found the requested interface.
- * Allocate the packet buffers if we need to.
- * If we're already attached to requested interface,
- * just flush the buffer.
- */
- if (d->bd_sbuf == NULL) {
- error = bpf_allocbufs(d);
- if (error != 0)
- return (error);
- }
- if (bp != d->bd_bif) {
- if (d->bd_bif)
- /*
- * Detach if attached to something else.
- */
- bpf_detachd(d);
+ if (d->bd_sbuf == NULL) {
+ error = bpf_allocbufs(d);
+ if (error != 0)
+ return (error);
+ }
+ if (bp != d->bd_bif) {
+ if (d->bd_bif)
+ /*
+ * Detach if attached to something else.
+ */
+ bpf_detachd(d);
- bpf_attachd(d, bp);
- }
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
- return (0);
+ bpf_attachd(d, bp);
}
- mtx_unlock(&bpf_mtx);
- /* Not found. */
- return (ENXIO);
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ return (0);
}
/*
@@ -1272,13 +1247,6 @@ bpf_tap(bp, pkt, pktlen)
struct bpf_d *d;
u_int slen;
- /*
- * Lockless read to avoid cost of locking the interface if there are
- * no descriptors attached.
- */
- if (LIST_EMPTY(&bp->bif_dlist))
- return;
-
BPFIF_LOCK(bp);
LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
BPFD_LOCK(d);
@@ -1339,13 +1307,6 @@ bpf_mtap(bp, m)
struct bpf_d *d;
u_int pktlen, slen;
- /*
- * Lockless read to avoid cost of locking the interface if there are
- * no descriptors attached.
- */
- if (LIST_EMPTY(&bp->bif_dlist))
- return;
-
pktlen = m_length(m, NULL);
BPFIF_LOCK(bp);
@@ -1391,13 +1352,6 @@ bpf_mtap2(bp, data, dlen, m)
struct bpf_d *d;
u_int pktlen, slen;
- /*
- * Lockless read to avoid cost of locking the interface if there are
- * no descriptors attached.
- */
- if (LIST_EMPTY(&bp->bif_dlist))
- return;
-
pktlen = m_length(m, NULL);
/*
* Craft on-stack mbuf suitable for passing to bpf_filter.
@@ -1589,17 +1543,16 @@ bpfattach2(ifp, dlt, hdrlen, driverp)
panic("bpfattach");
LIST_INIT(&bp->bif_dlist);
- bp->bif_driverp = driverp;
bp->bif_ifp = ifp;
bp->bif_dlt = dlt;
mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+ KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+ *driverp = bp;
mtx_lock(&bpf_mtx);
LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
mtx_unlock(&bpf_mtx);
- *bp->bif_driverp = NULL;
-
/*
* Compute the length of the bpf header. This is not necessarily
* equal to SIZEOF_BPF_HDR because we want to insert spacing such
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
index 189b30f..b2868c2 100644
--- a/sys/net/bpf.h
+++ b/sys/net/bpf.h
@@ -603,7 +603,18 @@ struct bpf_dltlist {
};
#ifdef _KERNEL
-struct bpf_if;
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+ LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+ u_int bif_dlt; /* link layer type */
+ u_int bif_hdrlen; /* length of header (with padding) */
+ struct ifnet *bif_ifp; /* corresponding interface */
+ struct mtx bif_mtx; /* mutex for interface */
+};
+
int bpf_validate(const struct bpf_insn *, int);
void bpf_tap(struct bpf_if *, u_char *, u_int);
void bpf_mtap(struct bpf_if *, struct mbuf *);
@@ -615,18 +626,25 @@ void bpfdetach(struct ifnet *);
void bpfilterattach(int);
u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
+static __inline int
+bpf_peers_present(struct bpf_if *bpf)
+{
+
+ return !LIST_EMPTY(&bpf->bif_dlist);
+}
+
#define BPF_TAP(_ifp,_pkt,_pktlen) do { \
- if ((_ifp)->if_bpf) \
+ if (bpf_peers_present((_ifp)->if_bpf)) \
bpf_tap((_ifp)->if_bpf, (_pkt), (_pktlen)); \
} while (0)
#define BPF_MTAP(_ifp,_m) do { \
- if ((_ifp)->if_bpf) { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
M_ASSERTVALID(_m); \
bpf_mtap((_ifp)->if_bpf, (_m)); \
} \
} while (0)
#define BPF_MTAP2(_ifp,_data,_dlen,_m) do { \
- if ((_ifp)->if_bpf) { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
M_ASSERTVALID(_m); \
bpf_mtap2((_ifp)->if_bpf,(_data),(_dlen),(_m)); \
} \
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
index 3de11d8..de1557a 100644
--- a/sys/net/bpfdesc.h
+++ b/sys/net/bpfdesc.h
@@ -120,19 +120,6 @@ struct bpf_d {
(bd)->bd_slen != 0))
/*
- * Descriptor associated with each attached hardware interface.
- */
-struct bpf_if {
- LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
- LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
- struct bpf_if **bif_driverp; /* pointer into softc */
- u_int bif_dlt; /* link layer type */
- u_int bif_hdrlen; /* length of header (with padding) */
- struct ifnet *bif_ifp; /* corresponding interface */
- struct mtx bif_mtx; /* mutex for interface */
-};
-
-/*
* External representation of the bpf descriptor
*/
struct xbpf_d {
diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c
index 1d87c47..8fd6d9d 100644
--- a/sys/net/if_disc.c
+++ b/sys/net/if_disc.c
@@ -158,7 +158,7 @@ discoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
dst->sa_family = af;
}
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
u_int af = dst->sa_family;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 9301e17..b602ce0 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -425,9 +425,7 @@ gif_output(ifp, m, dst, rt)
}
af = dst->sa_family;
- if (ifp->if_bpf) {
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
- }
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
ifp->if_opackets++;
ifp->if_obytes += m->m_pkthdr.len;
@@ -484,7 +482,7 @@ gif_input(m, af, ifp)
mac_create_mbuf_from_ifnet(ifp, m);
#endif
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
u_int32_t af1 = af;
bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
}
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index a6e090a..6cf80ea 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -276,7 +276,7 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
dst->sa_family = af;
}
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
af = dst->sa_family;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
}
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index 7698ffd..1b1ca0b 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -259,7 +259,7 @@ if_simloop(ifp, m, af, hlen)
m->m_pkthdr.rcvif = ifp;
/* Let BPF see incoming packet */
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
if (ifp->if_bpf->bif_dlt == DLT_NULL) {
u_int32_t af1 = af; /* XXX beware sizeof(af) != 4 */
/*
diff --git a/sys/net/if_sl.c b/sys/net/if_sl.c
index fb39574..d33e081 100644
--- a/sys/net/if_sl.c
+++ b/sys/net/if_sl.c
@@ -662,7 +662,7 @@ sltstart(struct tty *tp)
* queueing, and the connection id compression will get
* munged when this happens.
*/
- if (SL2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
/*
* We need to save the TCP/IP header before it's
* compressed. To avoid complicated code, we just
@@ -696,7 +696,7 @@ sltstart(struct tty *tp)
*mtod(m, u_char *) |= sl_compress_tcp(m, ip,
&sc->sc_comp, 1);
}
- if (SL2IFP(sc)->if_bpf && sc->bpfbuf) {
+ if (bpf_peers_present(SL2IFP(sc)->if_bpf) && sc->bpfbuf) {
/*
* Put the SLIP pseudo-"link header" in place. The
* compressed header is now at the beginning of the
@@ -922,7 +922,7 @@ slinput(int c, struct tty *tp)
/* less than min length packet - ignore */
goto newpack;
- if (SL2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
/*
* Save the compressed header, so we
* can tack it on later. Note that we
@@ -961,7 +961,7 @@ slinput(int c, struct tty *tp)
} else
goto error;
}
- if (SL2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(SL2IFP(sc)->if_bpf)) {
/*
* Put the SLIP pseudo-"link header" in place.
* We couldn't do this any earlier since
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index 9b0bfac..e51dc2d 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -476,7 +476,7 @@ stf_output(ifp, m, dst, rt)
}
bcopy(ptr, &in4, sizeof(in4));
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
/*
* We need to prepend the address family as
* a four byte field. Cons up a dummy header
@@ -723,7 +723,7 @@ in_stf_input(m, off)
m->m_pkthdr.rcvif = ifp;
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
/*
* We need to prepend the address family as
* a four byte field. Cons up a dummy header
diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c
index eafcdb5..f3b62d0 100644
--- a/sys/net/if_tun.c
+++ b/sys/net/if_tun.c
@@ -509,7 +509,7 @@ tunoutput(
dst->sa_family = af;
}
- if (ifp->if_bpf) {
+ if (bpf_peers_present(ifp->if_bpf)) {
af = dst->sa_family;
bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
}
OpenPOWER on IntegriCloud