summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/contrib/pf/net/pf.c31
-rw-r--r--sys/netinet/in_pcb.c109
-rw-r--r--sys/netinet/in_pcb.h50
-rw-r--r--sys/netinet/ip_divert.c6
-rw-r--r--sys/netinet/ipfw/ip_fw2.c50
-rw-r--r--sys/netinet/raw_ip.c29
-rw-r--r--sys/netinet/siftr.c22
-rw-r--r--sys/netinet/tcp_input.c238
-rw-r--r--sys/netinet/tcp_subr.c50
-rw-r--r--sys/netinet/tcp_syncache.c5
-rw-r--r--sys/netinet/tcp_timer.c2
-rw-r--r--sys/netinet/tcp_usrreq.c96
-rw-r--r--sys/netinet/udp_usrreq.c136
-rw-r--r--sys/netinet6/in6_pcb.c78
-rw-r--r--sys/netinet6/in6_pcb.h10
-rw-r--r--sys/netinet6/in6_src.c2
-rw-r--r--sys/netinet6/udp6_usrreq.c78
17 files changed, 557 insertions, 435 deletions
diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c
index d65ab8d..756ad3a 100644
--- a/sys/contrib/pf/net/pf.c
+++ b/sys/contrib/pf/net/pf.c
@@ -3034,16 +3034,14 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
#ifdef INET
case AF_INET:
#ifdef __FreeBSD__
- INP_INFO_RLOCK(pi); /* XXX LOR */
- inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
- dport, 0, NULL);
+ inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4,
+ dport, INPLOOKUP_RLOCKPCB, NULL);
if (inp == NULL) {
- inp = in_pcblookup_hash(pi, saddr->v4, sport,
- daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
- if(inp == NULL) {
- INP_INFO_RUNLOCK(pi);
+ inp = in_pcblookup(pi, saddr->v4, sport,
+ daddr->v4, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp == NULL)
return (-1);
- }
}
#else
inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
@@ -3058,16 +3056,14 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
#ifdef INET6
case AF_INET6:
#ifdef __FreeBSD__
- INP_INFO_RLOCK(pi);
- inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
- &daddr->v6, dport, 0, NULL);
+ inp = in6_pcblookup(pi, &saddr->v6, sport,
+ &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL);
if (inp == NULL) {
- inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
- &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
- if (inp == NULL) {
- INP_INFO_RUNLOCK(pi);
+ inp = in6_pcblookup(pi, &saddr->v6, sport,
+ &daddr->v6, dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp == NULL)
return (-1);
- }
}
#else
inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
@@ -3085,9 +3081,10 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
return (-1);
}
#ifdef __FreeBSD__
+ INP_RLOCK_ASSERT(inp);
pd->lookup.uid = inp->inp_cred->cr_uid;
pd->lookup.gid = inp->inp_cred->cr_groups[0];
- INP_INFO_RUNLOCK(pi);
+ INP_RUNLOCK(inp);
#else
pd->lookup.uid = inp->inp_socket->so_euid;
pd->lookup.gid = inp->inp_socket->so_egid;
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 85e31dc..8ae4d6f 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -127,6 +127,10 @@ static VNET_DEFINE(int, ipport_tcplastcount);
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
+static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
+ struct in_addr faddr, u_int fport_arg,
+ struct in_addr laddr, u_int lport_arg,
+ int lookupflags, struct ifnet *ifp);
static void in_pcbremlists(struct inpcb *inp);
#ifdef INET
@@ -212,11 +216,13 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
{
INP_INFO_LOCK_INIT(pcbinfo, name);
+ INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
pcbinfo->ipi_listhead = listhead;
LIST_INIT(pcbinfo->ipi_listhead);
+ pcbinfo->ipi_count = 0;
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
@@ -234,10 +240,14 @@ void
in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
{
+ KASSERT(pcbinfo->ipi_count == 0,
+ ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
+
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
uma_zdestroy(pcbinfo->ipi_zone);
+ INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
}
@@ -309,8 +319,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
{
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
@@ -351,8 +361,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
* Because no actual state changes occur here, a global write lock on
* the pcbinfo isn't required.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (inp->inp_flags & INP_HIGHPORT) {
first = V_ipport_hifirstauto; /* sysctl */
@@ -473,11 +483,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
int error;
/*
- * Because no actual state changes occur here, a global write lock on
- * the pcbinfo isn't required.
+ * No state changes, so read locks are sufficient here.
*/
- INP_INFO_LOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
@@ -618,8 +627,8 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
in_addr_t laddr, faddr;
int anonport, error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
lport = inp->inp_lport;
laddr = inp->inp_laddr.s_addr;
@@ -907,8 +916,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
* Because a global state change doesn't actually occur here, a read
* lock is sufficient.
*/
- INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
INP_LOCK_ASSERT(inp);
+ INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
if (oinpp != NULL)
*oinpp = NULL;
@@ -983,8 +992,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
if (error)
return (error);
}
- oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
- 0, NULL);
+ oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
+ laddr, lport, 0, NULL);
if (oinp != NULL) {
if (oinpp != NULL)
*oinpp = oinp;
@@ -1007,8 +1016,8 @@ void
in_pcbdisconnect(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
inp->inp_faddr.s_addr = INADDR_ANY;
inp->inp_fport = 0;
@@ -1187,19 +1196,24 @@ void
in_pcbdrop(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ /*
+ * XXXRW: Possibly we should protect the setting of INP_DROPPED with
+ * the hash lock...?
+ */
inp->inp_flags |= INP_DROPPED;
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(inp->inp_pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
}
@@ -1328,7 +1342,8 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
/*
- * Lookup a PCB based on the local address and port.
+ * Lookup a PCB based on the local address and port. Caller must hold the
+ * hash lock. No inpcb locks or references are acquired.
*/
#define INP_LOOKUP_MAPPED_PCB_COST 3
struct inpcb *
@@ -1346,7 +1361,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
@@ -1450,10 +1465,12 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
#undef INP_LOOKUP_MAPPED_PCB_COST
/*
- * Lookup PCB in hash list.
+ * Lookup PCB in hash list, using pcbinfo tables. This variation assumes
+ * that the caller has locked the hash list, and will not perform any further
+ * locking or reference operations on either the hash list or the connection.
*/
-struct inpcb *
-in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+static struct inpcb *
+in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
struct ifnet *ifp)
{
@@ -1464,7 +1481,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
/*
* First look for an exact match.
@@ -1574,6 +1591,56 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
return (NULL);
}
+
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
+ * hash list lock, and will return the inpcb locked (i.e., requires
+ * INPLOOKUP_LOCKPCB).
+ */
+static struct inpcb *
+in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ INP_HASH_RLOCK(pcbinfo);
+ inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
+ (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
+ if (inp != NULL) {
+ in_pcbref(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ } else
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (inp);
+}
+
+/*
+ * Public inpcb lookup routines, accepting a 4-tuple.
+ */
+struct inpcb *
+in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
+ struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
+{
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+ return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
#endif /* INET */
/*
@@ -1588,8 +1655,9 @@ in_pcbinshash(struct inpcb *inp)
struct inpcbport *phd;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
("in_pcbinshash: INP_INHASHLIST"));
@@ -1645,8 +1713,9 @@ in_pcbrehash(struct inpcb *inp)
struct inpcbhead *head;
u_int32_t hashkey_faddr;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
+
KASSERT(inp->inp_flags & INP_INHASHLIST,
("in_pcbrehash: !INP_INHASHLIST"));
@@ -1679,12 +1748,14 @@ in_pcbremlists(struct inpcb *inp)
if (inp->inp_flags & INP_INHASHLIST) {
struct inpcbport *phd = inp->inp_phd;
+ INP_HASH_WLOCK(pcbinfo);
LIST_REMOVE(inp, inp_hash);
LIST_REMOVE(inp, inp_portlist);
if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
LIST_REMOVE(phd, phd_hash);
free(phd, M_PCB);
}
+ INP_HASH_WUNLOCK(pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
}
LIST_REMOVE(inp, inp_list);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 14d4ea2..bde9e8d 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -268,22 +268,22 @@ struct inpcbport {
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by ipi_lock, covering mutable global fields (such
- * as the global pcb list) and hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
+ * the former covering mutable global fields (such as the global pcb list),
+ * and the latter covering the hashed lookup tables. The lock order is:
*
- * ipi_lock (before) inpcb locks
+ * ipi_lock (before) inpcb locks (before) ipi_hash_lock
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
- * (h) Read using either ipi_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both.
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, hash tables,
- * etc.
+ * Global lock protecting global inpcb list, inpcb count, etc.
*/
struct rwlock ipi_lock;
@@ -312,17 +312,22 @@ struct inpcbinfo {
struct uma_zone *ipi_zone; /* (c) */
/*
+ * Global lock protecting hash lookup tables.
+ */
+ struct rwlock ipi_hash_lock;
+
+ /*
* Global hash of inpcbs, hashed by local and foreign addresses and
* port numbers.
*/
- struct inpcbhead *ipi_hashbase; /* (g) */
- u_long ipi_hashmask; /* (g) */
+ struct inpcbhead *ipi_hashbase; /* (h) */
+ u_long ipi_hashmask; /* (h) */
/*
* Global hash of inpcbs, hashed by only local port number.
*/
- struct inpcbporthead *ipi_porthashbase; /* (g) */
- u_long ipi_porthashmask; /* (g) */
+ struct inpcbporthead *ipi_porthashbase; /* (h) */
+ u_long ipi_porthashmask; /* (h) */
/*
* Pointer to network stack instance
@@ -406,6 +411,18 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
+#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_LOCKED)
+#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_WLOCKED)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -466,7 +483,16 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
-#define INPLOOKUP_WILDCARD 1
+/*
+ * Flags passed to in_pcblookup*() functions.
+ */
+#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
+#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
+#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
+
+#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
+ INPLOOKUP_WLOCKPCB)
+
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
@@ -527,7 +553,7 @@ struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
struct inpcb *
- in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
+ in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
int, struct inpcb *(*)(struct inpcb *, int));
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index de88556..9f0e31c 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -659,9 +659,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_divcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_divcbinfo);
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index 85ae1b2..6672084 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -657,7 +657,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
(struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
#else /* FreeBSD */
struct inpcbinfo *pi;
- int wildcard;
+ int lookupflags;
struct inpcb *pcb;
int match;
@@ -682,30 +682,31 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
if (*ugid_lookupp == -1)
return (0);
if (proto == IPPROTO_TCP) {
- wildcard = 0;
+ lookupflags = 0;
pi = &V_tcbinfo;
} else if (proto == IPPROTO_UDP) {
- wildcard = INPLOOKUP_WILDCARD;
+ lookupflags = INPLOOKUP_WILDCARD;
pi = &V_udbinfo;
} else
return 0;
+ lookupflags |= INPLOOKUP_RLOCKPCB;
match = 0;
if (*ugid_lookupp == 0) {
- INP_INFO_RLOCK(pi);
pcb = (oif) ?
- in_pcblookup_hash(pi,
+ in_pcblookup(pi,
dst_ip, htons(dst_port),
src_ip, htons(src_port),
- wildcard, oif) :
- in_pcblookup_hash(pi,
+ lookupflags, oif) :
+ in_pcblookup(pi,
src_ip, htons(src_port),
dst_ip, htons(dst_port),
- wildcard, NULL);
+ lookupflags, NULL);
if (pcb != NULL) {
+ INP_RLOCK_ASSERT(pcb);
*uc = crhold(pcb->inp_cred);
*ugid_lookupp = 1;
+ INP_RUNLOCK(pcb);
}
- INP_INFO_RUNLOCK(pi);
if (*ugid_lookupp == 0) {
/*
* We tried and failed, set the variable to -1
@@ -1827,21 +1828,32 @@ do { \
else
break;
+ /*
+ * XXXRW: so_user_cookie should almost
+ * certainly be inp_user_cookie?
+ */
+
/* For incomming packet, lookup up the
inpcb using the src/dest ip/port tuple */
if (inp == NULL) {
- INP_INFO_RLOCK(pi);
- inp = in_pcblookup_hash(pi,
+ inp = in_pcblookup(pi,
src_ip, htons(src_port),
dst_ip, htons(dst_port),
- 0, NULL);
- INP_INFO_RUNLOCK(pi);
- }
-
- if (inp && inp->inp_socket) {
- tablearg = inp->inp_socket->so_user_cookie;
- if (tablearg)
- match = 1;
+ INPLOOKUP_RLOCKPCB, NULL);
+ if (inp != NULL) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ INP_RUNLOCK(inp);
+ }
+ } else {
+ if (inp->inp_socket) {
+ tablearg =
+ inp->inp_socket->so_user_cookie;
+ if (tablearg)
+ match = 1;
+ }
}
break;
}
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index be099a8..635f08f 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -226,7 +226,7 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
{
int policyfail = 0;
- INP_RLOCK_ASSERT(last);
+ INP_LOCK_ASSERT(last);
#ifdef IPSEC
/* check AH/ESP integrity. */
@@ -834,16 +834,19 @@ rip_detach(struct socket *so)
static void
rip_dodisconnect(struct socket *so, struct inpcb *inp)
{
+ struct inpcbinfo *pcbinfo;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
- INP_WLOCK_ASSERT(inp);
-
+ pcbinfo = inp->inp_pcbinfo;
+ INP_INFO_WLOCK(pcbinfo);
+ INP_WLOCK(inp);
rip_delhash(inp);
inp->inp_faddr.s_addr = INADDR_ANY;
rip_inshash(inp);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED;
SOCK_UNLOCK(so);
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(pcbinfo);
}
static void
@@ -854,11 +857,7 @@ rip_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static void
@@ -869,11 +868,7 @@ rip_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_close: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
}
static int
@@ -887,11 +882,7 @@ rip_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_ripcbinfo);
- INP_WLOCK(inp);
rip_dodisconnect(so, inp);
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
return (0);
}
@@ -1077,9 +1068,9 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_ripcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_ripcbinfo);
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 9d11262..6145a54 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -696,17 +696,16 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
/* We need the tcbinfo lock. */
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- INP_INFO_RLOCK(&V_tcbinfo);
if (dir == PFIL_IN)
inp = (ipver == INP_IPV4 ?
- in_pcblookup_hash(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst,
- dport, 0, m->m_pkthdr.rcvif)
+ in_pcblookup(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst,
+ dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif)
:
#ifdef SIFTR_IPV6
- in6_pcblookup_hash(&V_tcbinfo,
+ in6_pcblookup(&V_tcbinfo,
&((struct ip6_hdr *)ip)->ip6_src, sport,
- &((struct ip6_hdr *)ip)->ip6_dst, dport, 0,
+ &((struct ip6_hdr *)ip)->ip6_dst, dport, INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif)
#else
NULL
@@ -715,13 +714,13 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
else
inp = (ipver == INP_IPV4 ?
- in_pcblookup_hash(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src,
- sport, 0, m->m_pkthdr.rcvif)
+ in_pcblookup(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src,
+ sport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif)
:
#ifdef SIFTR_IPV6
- in6_pcblookup_hash(&V_tcbinfo,
+ in6_pcblookup(&V_tcbinfo,
&((struct ip6_hdr *)ip)->ip6_dst, dport,
- &((struct ip6_hdr *)ip)->ip6_src, sport, 0,
+ &((struct ip6_hdr *)ip)->ip6_src, sport, INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif)
#else
NULL
@@ -734,12 +733,7 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
ss->nskip_in_inpcb++;
else
ss->nskip_out_inpcb++;
- } else {
- /* Acquire the inpcb lock. */
- INP_UNLOCK_ASSERT(inp);
- INP_RLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
return (inp);
}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 1a94d0a..080b4da 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -5,6 +5,7 @@
* Swinburne University of Technology, Melbourne, Australia.
* Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* Portions of this software were developed at the Centre for Advanced Internet
@@ -16,6 +17,9 @@
* Internet Architectures, Swinburne University of Technology, Melbourne,
* Australia by David Hayes under sponsorship from the FreeBSD Foundation.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -197,10 +201,6 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
-int tcp_read_locking = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW,
- &tcp_read_locking, 0, "Enable read locking strategy");
-
VNET_DEFINE(struct inpcbhead, tcb);
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
@@ -591,8 +591,7 @@ tcp_input(struct mbuf *m, int off0)
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_RLOCKED 2
-#define TI_WLOCKED 3
+#define TI_WLOCKED 2
#ifdef TCPDEBUG
/*
@@ -756,30 +755,25 @@ tcp_input(struct mbuf *m, int off0)
drop_hdrlen = off0 + off;
/*
- * Locate pcb for segment, which requires a lock on tcbinfo.
- * Optimisticaly acquire a global read lock rather than a write lock
- * unless header flags necessarily imply a state change. There are
- * two cases where we might discover later we need a write lock
- * despite the flags: ACKs moving a connection out of the syncache,
- * and ACKs for a connection in TIMEWAIT.
+ * Locate pcb for segment; if we're likely to add or remove a
+ * connection then first acquire pcbinfo lock. There are two cases
+ * where we might discover later we need a write lock despite the
+ * flags: ACKs moving a connection out of the syncache, and ACKs for
+ * a connection in TIMEWAIT.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
- } else {
- INP_INFO_RLOCK(&V_tcbinfo);
- ti_locked = TI_RLOCKED;
- }
+ } else
+ ti_locked = TI_UNLOCKED;
findpcb:
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: findpcb ti_locked %d\n", __func__, ti_locked);
+ } else {
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
#ifdef INET
@@ -797,20 +791,18 @@ findpcb:
* Transparently forwarded. Pretend to be the destination.
* already got one like this?
*/
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- 0, m->m_pkthdr.rcvif);
+ inp = in_pcblookup(&V_tcbinfo, ip->ip_src, th->th_sport,
+ ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif);
if (!inp) {
- /* It's new. Try to find the ambushing socket. */
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- next_hop->sin_addr,
- next_hop->sin_port ?
- ntohs(next_hop->sin_port) :
- th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ /*
+ * It's new. Try to find the ambushing socket.
+ */
+ inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
+ th->th_sport, next_hop->sin_addr,
+ next_hop->sin_port ? ntohs(next_hop->sin_port) :
+ th->th_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
/* Remove the tag from the packet. We don't need it anymore. */
m_tag_delete(m, fwd_tag);
@@ -820,21 +812,19 @@ findpcb:
{
#ifdef INET6
if (isipv6)
- inp = in6_pcblookup_hash(&V_tcbinfo,
- &ip6->ip6_src, th->th_sport,
- &ip6->ip6_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
+ th->th_sport, &ip6->ip6_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif);
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
- inp = in_pcblookup_hash(&V_tcbinfo,
- ip->ip_src, th->th_sport,
- ip->ip_dst, th->th_dport,
- INPLOOKUP_WILDCARD,
- m->m_pkthdr.rcvif);
+ inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
+ th->th_sport, ip->ip_dst, th->th_dport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
+ m->m_pkthdr.rcvif);
#endif
}
@@ -865,7 +855,7 @@ findpcb:
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
- INP_WLOCK(inp);
+ INP_WLOCK_ASSERT(inp);
if (!(inp->inp_flags & INP_HW_FLOWID)
&& (m->m_flags & M_FLOWID)
&& ((inp->inp_socket == NULL)
@@ -906,28 +896,26 @@ findpcb:
* legitimate new connection attempt the old INPCB gets removed and
* we can try again to find a listening socket.
*
- * At this point, due to earlier optimism, we may hold a read lock on
- * the inpcbinfo, rather than a write lock. If so, we need to
- * upgrade, or if that fails, acquire a reference on the inpcb, drop
- * all locks, acquire a global write lock, and then re-acquire the
- * inpcb lock. We may at that point discover that another thread has
- * tried to free the inpcb, in which case we need to loop back and
- * try to find a new inpcb to deliver to.
+ * At this point, due to earlier optimism, we may hold only an inpcb
+ * lock, and not the inpcbinfo write lock. If so, we need to try to
+ * acquire it, or if that fails, acquire a reference on the inpcb,
+ * drop all locks, acquire a global write lock, and then re-acquire
+ * the inpcb lock. We may at that point discover that another thread
+ * has tried to free the inpcb, in which case we need to loop back
+ * and try to find a new inpcb to deliver to.
+ *
+ * XXXRW: It may be time to rethink timewait locking.
*/
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -975,26 +963,24 @@ relocked:
/*
* We've identified a valid inpcb, but it could be that we need an
- * inpcbinfo write lock and have only a read lock. In this case,
- * attempt to upgrade/relock using the same strategy as the TIMEWAIT
- * case above. If we relock, we have to jump back to 'relocked' as
- * the connection might now be in TIMEWAIT.
+ * inpcbinfo write lock but don't hold it. In this case, attempt to
+ * acquire using the same strategy as the TIMEWAIT case above. If we
+ * relock, we have to jump back to 'relocked' as the connection might
+ * now be in TIMEWAIT.
*/
- if (tp->t_state != TCPS_ESTABLISHED ||
- (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tcp_read_locking == 0) {
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("%s: upgrade check ti_locked %d", __func__, ti_locked));
-
- if (ti_locked == TI_RLOCKED) {
- if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) {
+#ifdef INVARIANTS
+ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#endif
+ if (tp->t_state != TCPS_ESTABLISHED) {
+ if (ti_locked == TI_UNLOCKED) {
+ if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
INP_INFO_WLOCK(&V_tcbinfo);
ti_locked = TI_WLOCKED;
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
@@ -1027,13 +1013,16 @@ relocked:
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one.
+ * attempt or the completion of a previous one. Because listen
+ * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
+ * held in this case.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
bzero(&inc, sizeof(inc));
#ifdef INET6
@@ -1371,13 +1360,17 @@ relocked:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -1388,13 +1381,17 @@ dropwithreset:
goto drop;
dropunlock:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropunlock ti_locked %d", __func__, ti_locked);
- ti_locked = TI_UNLOCKED;
+ ti_locked = TI_UNLOCKED;
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
if (inp != NULL)
INP_WUNLOCK(inp);
@@ -1449,13 +1446,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- else
- panic("%s: ti_locked %d for EST", __func__,
- ti_locked);
+ else {
+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
+ "ti_locked: %d", __func__, ti_locked));
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
#endif
}
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -1601,13 +1598,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure ACK",
- __func__, ti_locked);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1708,13 +1700,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: ti_locked %d on pure data "
- "segment", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -2550,9 +2537,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
process_ACK:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_input: process_ACK ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
@@ -2716,9 +2700,6 @@ process_ACK:
}
step6:
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: step6 ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2804,9 +2785,6 @@ step6:
tp->rcv_up = tp->rcv_nxt;
}
dodata: /* XXX */
- INP_INFO_LOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dodata ti_locked %d", ti_locked));
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2938,13 +2916,8 @@ dodata: /* XXX */
return;
}
}
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dodata epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -2973,9 +2946,6 @@ check_delack:
return;
dropafterack:
- KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED,
- ("tcp_do_segment: dropafterack ti_locked %d", ti_locked));
-
/*
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
@@ -3002,13 +2972,8 @@ dropafterack:
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropafterack epilogue ti_locked %d", __func__,
- ti_locked);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3018,12 +2983,8 @@ dropafterack:
return;
dropwithreset:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED)
INP_INFO_WUNLOCK(&V_tcbinfo);
- else
- panic("%s: dropwithreset ti_locked %d", __func__, ti_locked);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3034,15 +2995,14 @@ dropwithreset:
return;
drop:
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK(&V_tcbinfo);
- else if (ti_locked == TI_WLOCKED)
+ if (ti_locked == TI_WLOCKED) {
INP_INFO_WUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
#ifdef INVARIANTS
else
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
#endif
- ti_locked = TI_UNLOCKED;
/*
* Drop space held by incoming segment and return.
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 2c013be..06854ec 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1184,9 +1184,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1228,12 +1228,9 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, addrs[1].sin_addr,
- addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1241,10 +1238,8 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1286,23 +1281,20 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
- INP_INFO_RLOCK(&V_tcbinfo);
#ifdef INET
if (mapped == 1)
- inp = in_pcblookup_hash(&V_tcbinfo,
+ inp = in_pcblookup(&V_tcbinfo,
*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
addrs[1].sin6_port,
*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
- addrs[0].sin6_port,
- 0, NULL);
+ addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
else
#endif
- inp = in6_pcblookup_hash(&V_tcbinfo,
+ inp = in6_pcblookup(&V_tcbinfo,
&addrs[1].sin6_addr, addrs[1].sin6_port,
- &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL);
+ &addrs[0].sin6_addr, addrs[0].sin6_port,
+ INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -1310,10 +1302,8 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -1374,10 +1364,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
INP_INFO_WLOCK(&V_tcbinfo);
- inp = in_pcblookup_hash(&V_tcbinfo, faddr, th->th_dport,
- ip->ip_src, th->th_sport, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
+ ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
- INP_WLOCK(inp);
if (!(inp->inp_flags & INP_TIMEWAIT) &&
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
@@ -2154,20 +2143,19 @@ sysctl_drop(SYSCTL_HANDLER_ARGS)
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
- inp = in6_pcblookup_hash(&V_tcbinfo, &fin6->sin6_addr,
- fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, 0,
- NULL);
+ inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
+ fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
+ INPLOOKUP_WLOCKPCB, NULL);
break;
#endif
#ifdef INET
case AF_INET:
- inp = in_pcblookup_hash(&V_tcbinfo, fin->sin_addr,
- fin->sin_port, lin->sin_addr, lin->sin_port, 0, NULL);
+ inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
+ lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
break;
#endif
}
if (inp != NULL) {
- INP_WLOCK(inp);
if (inp->inp_flags & INP_TIMEWAIT) {
/*
* XXXRW: There currently exists a state where an
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 8262f43..533de4b 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -661,6 +661,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
@@ -694,6 +695,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
#ifdef IPSEC
@@ -737,6 +739,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
/* Override flowlabel from in6_pcbconnect. */
@@ -776,10 +779,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
s, __func__, error);
free(s, M_TCPLOG);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
}
#endif /* INET */
+ INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
tp->t_state = TCPS_SYN_RECEIVED;
tp->iss = sc->sc_iss;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 5c2c5c2..73984c7 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -490,7 +490,7 @@ tcp_timer_rexmt(void * xtp)
INP_WUNLOCK(inp);
INP_INFO_WLOCK(&V_tcbinfo);
INP_WLOCK(inp);
- if (in_pcbrele(inp)) {
+ if (in_pcbrele_wlocked(inp)) {
INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 318fe27..9bad970 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2,8 +2,12 @@
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California.
* Copyright (c) 2006-2007 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -251,7 +255,6 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -261,11 +264,12 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
@@ -292,7 +296,6 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
return (EAFNOSUPPORT);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
INP_WLOCK(inp);
@@ -302,6 +305,7 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
tp = intotcpcb(inp);
TCPDEBUG1();
+ INP_HASH_WLOCK(&V_tcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
#ifdef INET
@@ -316,15 +320,16 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag &= ~INP_IPV6;
error = in_pcbbind(inp, (struct sockaddr *)&sin,
td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
goto out;
}
}
#endif
error = in6_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
out:
TCPDEBUG2(PRU_BIND);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -341,7 +346,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -353,8 +357,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
@@ -365,7 +371,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET */
@@ -379,7 +384,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
INP_WLOCK(inp);
@@ -391,12 +395,14 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
}
+ INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
@@ -406,7 +412,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -440,7 +445,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
return (error);
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -456,7 +460,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET */
@@ -482,7 +485,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
&& IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
return (EAFNOSUPPORT);
- INP_INFO_WLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -493,6 +495,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
tp = intotcpcb(inp);
TCPDEBUG1();
#ifdef INET
+ /*
+ * XXXRW: Some confusion: V4/V6 flags relate to binding, and
+ * therefore probably require the hash lock, which isn't held here.
+ * Is this a significant problem?
+ */
if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
struct sockaddr_in sin;
@@ -525,7 +532,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
#endif /* INET6 */
@@ -639,6 +645,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -664,6 +671,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -750,25 +758,17 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- int headlocked = 0;
#ifdef INET6
int isipv6;
#endif
TCPDEBUG0;
/*
- * We require the pcbinfo lock in two cases:
- *
- * (1) An implied connect is taking place, which can result in
- * binding IPs and ports and hence modification of the pcb hash
- * chains.
- *
- * (2) PRUS_EOF is set, resulting in explicit close on the send.
+ * We require the pcbinfo lock if we will close the socket as part of
+ * this call.
*/
- if ((nam != NULL) || (flags & PRUS_EOF)) {
+ if (flags & PRUS_EOF)
INP_INFO_WLOCK(&V_tcbinfo);
- headlocked = 1;
- }
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -805,7 +805,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -830,10 +829,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
socantsendmore(so);
tcp_usrclosed(tp);
}
- if (headlocked) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- }
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
@@ -869,7 +864,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -884,11 +878,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
- } else if (nam) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- headlocked = 0;
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
@@ -899,7 +888,7 @@ out:
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
- if (headlocked)
+ if (flags & PRUS_EOF)
INP_INFO_WUNLOCK(&V_tcbinfo);
return (error);
}
@@ -1087,13 +1076,13 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
u_short lport;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1106,11 +1095,14 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
&inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
if (error && oinp == NULL)
- return error;
- if (oinp)
- return EADDRINUSE;
+ goto out;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
inp->inp_laddr = laddr;
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/*
* Compute window scaling to request:
@@ -1129,6 +1121,10 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return (error);
}
#endif /* INET */
@@ -1142,13 +1138,13 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
struct in6_addr addr6;
int error;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK(&V_tcbinfo);
if (inp->inp_lport == 0) {
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
- return error;
+ goto out;
}
/*
@@ -1156,18 +1152,23 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
* earlier incarnation of this same connection still in
* TIME_WAIT state, creating an ADDRINUSE error.
* in6_pcbladdr() also handles scope zone IDs.
+ *
+ * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
+ * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
*/
error = in6_pcbladdr(inp, nam, &addr6);
if (error)
return error;
- oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
+ oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
&sin6->sin6_addr, sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
? &addr6
: &inp->in6p_laddr,
inp->inp_lport, 0, NULL);
- if (oinp)
- return EADDRINUSE;
+ if (oinp) {
+ error = EADDRINUSE;
+ goto out;
+ }
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = addr6;
inp->in6p_faddr = sin6->sin6_addr;
@@ -1178,6 +1179,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
inp->inp_flow |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
in_pcbrehash(inp);
+ INP_HASH_WUNLOCK(&V_tcbinfo);
/* Compute window scaling to request. */
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
@@ -1192,6 +1194,10 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
tcp_sendseqinit(tp);
return 0;
+
+out:
+ INP_HASH_WUNLOCK(&V_tcbinfo);
+ return error;
}
#endif /* INET6 */
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index c3503e6..5f2d7c3 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -2,8 +2,12 @@
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
* The Regents of the University of California.
* Copyright (c) 2008 Robert N. M. Watson
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -253,7 +257,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
#endif
struct udpcb *up;
- INP_RLOCK_ASSERT(inp);
+ INP_LOCK_ASSERT(inp);
/*
* Engage the tunneling protocol.
@@ -458,12 +462,12 @@ udp_input(struct mbuf *m, int off)
}
#endif
- INP_INFO_RLOCK(&V_udbinfo);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
struct ip_moptions *imo;
+ INP_INFO_RLOCK(&V_udbinfo);
last = NULL;
LIST_FOREACH(inp, &V_udb, inp_list) {
if (inp->inp_lport != uh->uh_dport)
@@ -485,6 +489,13 @@ udp_input(struct mbuf *m, int off)
INP_RLOCK(inp);
/*
+ * XXXRW: Because we weren't holding either the inpcb
+ * or the hash lock when we checked for a match
+ * before, we should probably recheck now that the
+ * inpcb lock is held.
+ */
+
+ /*
* Handle socket delivery policy for any-source
* and source-specific multicast. [RFC3678]
*/
@@ -542,7 +553,10 @@ udp_input(struct mbuf *m, int off)
* or multicast datgram.)
*/
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
+ if (inp)
+ INP_RUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_udbinfo);
+ goto badunlocked;
}
udp_append(last, ip, m, iphlen, &udp_in);
INP_RUNLOCK(last);
@@ -553,8 +567,9 @@ udp_input(struct mbuf *m, int off)
/*
* Locate pcb for datagram.
*/
- inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport,
- ip->ip_dst, uh->uh_dport, 1, ifp);
+ inp = in_pcblookup(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+ ifp);
if (inp == NULL) {
if (udp_log_in_vain) {
char buf[4*sizeof "123"];
@@ -568,36 +583,31 @@ udp_input(struct mbuf *m, int off)
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
UDPSTAT_INC(udps_noportbcast);
- goto badheadlocked;
+ goto badunlocked;
}
if (V_udp_blackhole)
- goto badheadlocked;
+ goto badunlocked;
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
- goto badheadlocked;
+ goto badunlocked;
*ip = save_ip;
ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
- INP_INFO_RUNLOCK(&V_udbinfo);
return;
}
/*
* Check the minimum TTL for socket.
*/
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
INP_RUNLOCK(inp);
- goto badunlocked;
+ m_freem(m);
+ return;
}
udp_append(inp, ip, m, iphlen, &udp_in);
INP_RUNLOCK(inp);
return;
-badheadlocked:
- if (inp)
- INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
badunlocked:
m_freem(m);
}
@@ -656,17 +666,15 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
return;
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport,
- ip->ip_src, uh->uh_sport, 0, NULL);
+ inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+ ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket != NULL) {
udp_notify(inp, inetctlerrmap[cmd]);
}
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
udp_notify);
@@ -756,9 +764,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
INP_INFO_WLOCK(&V_udbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
- INP_WLOCK(inp);
- if (!in_pcbrele(inp))
- INP_WUNLOCK(inp);
+ INP_RLOCK(inp);
+ if (!in_pcbrele_rlocked(inp))
+ INP_RUNLOCK(inp);
}
INP_INFO_WUNLOCK(&V_udbinfo);
@@ -799,12 +807,11 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
- addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
+ inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+ addrs[0].sin_addr, addrs[0].sin_port,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -812,10 +819,8 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_udbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -924,6 +929,9 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt)
}
#ifdef INET
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
@@ -1016,29 +1024,27 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* conservative locks than required the second time around, so later
* assertions have to accept that. Further analysis of the number of
* misses under contention is required.
+ *
+ * XXXRW: Check that hash locking update here is correct.
*/
sin = (struct sockaddr_in *)addr;
INP_RLOCK(inp);
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_RUNLOCK(inp);
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
- unlock_udbinfo = 2;
+ INP_HASH_WLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_WLOCKED;
} else if ((sin != NULL && (
(sin->sin_addr.s_addr == INADDR_ANY) ||
(sin->sin_addr.s_addr == INADDR_BROADCAST) ||
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) {
- INP_RUNLOCK(inp);
- INP_INFO_RLOCK(&V_udbinfo);
- INP_RLOCK(inp);
- }
- unlock_udbinfo = 1;
+ INP_HASH_RLOCK(&V_udbinfo);
+ unlock_udbinfo = UH_RLOCKED;
} else
- unlock_udbinfo = 0;
+ unlock_udbinfo = UH_UNLOCKED;
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -1048,7 +1054,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
laddr = inp->inp_laddr;
lport = inp->inp_lport;
if (src.sin_family == AF_INET) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
if ((lport == 0) ||
(laddr.s_addr == INADDR_ANY &&
src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1099,7 +1105,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
inp->inp_lport == 0 ||
sin->sin_addr.s_addr == INADDR_ANY ||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
- INP_INFO_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(&V_udbinfo);
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
&lport, &faddr.s_addr, &fport, NULL,
td->td_ucred);
@@ -1113,8 +1119,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
/* Commit the local port if newly assigned. */
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
- INP_INFO_WLOCK_ASSERT(&V_udbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(&V_udbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1209,25 +1215,25 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
UDPSTAT_INC(udps_opackets);
- if (unlock_udbinfo == 2)
- INP_INFO_WUNLOCK(&V_udbinfo);
- else if (unlock_udbinfo == 1)
- INP_INFO_RUNLOCK(&V_udbinfo);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(&V_udbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK(&V_udbinfo);
error = ip_output(m, inp->inp_options, NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_udbinfo == 2)
+ if (unlock_udbinfo == UH_WLOCKED)
INP_WUNLOCK(inp);
else
INP_RUNLOCK(inp);
return (error);
release:
- if (unlock_udbinfo == 2) {
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
- } else if (unlock_udbinfo == 1) {
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK(&V_udbinfo);
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
} else
INP_RUNLOCK(inp);
m_freem(m);
@@ -1376,15 +1382,15 @@ udp_abort(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1453,11 +1459,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1468,15 +1474,15 @@ udp_close(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_close: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -1488,25 +1494,23 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (EISCONN);
}
sin = (struct sockaddr_in *)nam;
error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
if (error != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbconnect(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -1538,21 +1542,19 @@ udp_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr == INADDR_ANY) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (ENOTCONN);
}
-
+ INP_HASH_WLOCK(&V_udbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ INP_HASH_WUNLOCK(&V_udbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index eacce8c..17fb953 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -1,7 +1,11 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -114,8 +118,8 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
int error, lookupflags = 0;
int reuseport = (so->so_options & SO_REUSEPORT);
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */
return (EADDRNOTAVAIL);
@@ -298,8 +302,8 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
int scope_ambiguous = 0;
struct in6_addr in6a;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */
if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
@@ -363,12 +367,13 @@ int
in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred)
{
+ struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
struct in6_addr addr6;
int error;
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Call inner routine, to assign local interface address.
@@ -377,7 +382,7 @@ in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
return (error);
- if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
+ if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
? &addr6 : &inp->in6p_laddr,
@@ -409,8 +414,8 @@ void
in6_pcbdisconnect(struct inpcb *inp)
{
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
inp->inp_fport = 0;
@@ -649,7 +654,8 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
}
/*
- * Lookup a PCB based on the local address and port.
+ * Lookup a PCB based on the local address and port. Caller must hold the
+ * hash lock. No inpcb locks or references are acquired.
*/
struct inpcb *
in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
@@ -661,7 +667,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
struct inpcbhead *head;
@@ -818,9 +824,9 @@ in6_rtchange(struct inpcb *inp, int errno)
* Lookup PCB in hash list.
*/
struct inpcb *
-in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
- u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags,
- struct ifnet *ifp)
+in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
+ int lookupflags, struct ifnet *ifp)
{
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
@@ -830,7 +836,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_INFO_LOCK_ASSERT(pcbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if (faithprefix_p != NULL)
faith = (*faithprefix_p)(laddr);
@@ -934,6 +940,56 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
return (NULL);
}
+/*
+ * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
+ * hash list lock, and will return the inpcb locked (i.e., requires
+ * INPLOOKUP_LOCKPCB).
+ */
+static struct inpcb *
+in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
+ struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ INP_HASH_RLOCK(pcbinfo);
+ inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
+ (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
+ if (inp != NULL) {
+ in_pcbref(inp);
+ INP_HASH_RUNLOCK(pcbinfo);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ } else
+ INP_HASH_RUNLOCK(pcbinfo);
+ return (inp);
+}
+
+/*
+ * Public inpcb lookup routines, accepting a 4-tuple.
+ */
+struct inpcb *
+in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
+ struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
+{
+
+ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
+ ("%s: invalid lookup flags %d", __func__, lookupflags));
+ KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
+ ("%s: LOCKPCB not set", __func__));
+
+ return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
+ lookupflags, ifp));
+}
+
void
init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
{
diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h
index c54a8cf..bce5ee3 100644
--- a/sys/netinet6/in6_pcb.h
+++ b/sys/netinet6/in6_pcb.h
@@ -80,9 +80,13 @@ struct inpcb *
struct in6_addr *, u_short, int,
struct ucred *));
struct inpcb *
- in6_pcblookup_hash __P((struct inpcbinfo *,
- struct in6_addr *, u_int, struct in6_addr *,
- u_int, int, struct ifnet *));
+ in6_pcblookup __P((struct inpcbinfo *, struct in6_addr *,
+ u_int, struct in6_addr *, u_int, int,
+ struct ifnet *));
+struct inpcb *
+ in6_pcblookup_hash_locked __P((struct inpcbinfo *, struct in6_addr *,
+ u_int, struct in6_addr *, u_int, int,
+ struct ifnet *));
void in6_pcbnotify __P((struct inpcbinfo *, struct sockaddr *,
u_int, const struct sockaddr *, u_int, int, void *,
struct inpcb *(*)(struct inpcb *, int)));
diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c
index 5202e09..c802bfc 100644
--- a/sys/netinet6/in6_src.c
+++ b/sys/netinet6/in6_src.c
@@ -856,8 +856,8 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
#endif
- INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
error = prison_local_ip6(cred, laddr,
((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0));
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index d574c19..75145d7 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -1,7 +1,11 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -231,11 +235,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
init_sin6(&fromsa, m);
fromsa.sin6_port = uh->uh_sport;
- INP_INFO_RLOCK(&V_udbinfo);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct inpcb *last;
struct ip6_moptions *imo;
+ INP_INFO_RLOCK(&V_udbinfo);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -273,6 +277,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
}
/*
+ * XXXRW: Because we weren't holding either the inpcb
+ * or the hash lock when we checked for a match
+ * before, we should probably recheck now that the
+ * inpcb lock is (supposed to be) held.
+ */
+
+ /*
* Handle socket delivery policy for any-source
* and source-specific multicast. [RFC3678]
*/
@@ -366,8 +377,9 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
/*
* Locate pcb for datagram.
*/
- inp = in6_pcblookup_hash(&V_udbinfo, &ip6->ip6_src, uh->uh_sport,
- &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+ inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src, uh->uh_sport,
+ &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
+ INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
if (inp == NULL) {
if (udp_log_in_vain) {
char ip6bufs[INET6_ADDRSTRLEN];
@@ -384,9 +396,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if (m->m_flags & M_MCAST) {
printf("UDP6: M_MCAST is set in a unicast packet.\n");
UDPSTAT_INC(udps_noportmcast);
- goto badheadlocked;
+ goto badunlocked;
}
- INP_INFO_RUNLOCK(&V_udbinfo);
if (V_udp_blackhole)
goto badunlocked;
if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
@@ -394,8 +405,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
return (IPPROTO_DONE);
}
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
up = intoudpcb(inp);
if (up->u_tun_func == NULL) {
udp6_append(inp, m, off, &fromsa);
@@ -505,13 +515,11 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
(error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
return (error);
}
- INP_INFO_RLOCK(&V_udbinfo);
- inp = in6_pcblookup_hash(&V_udbinfo, &addrs[1].sin6_addr,
- addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1,
- NULL);
+ inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
+ addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
- INP_RLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
error = ENOENT;
if (error == 0)
@@ -520,10 +528,8 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
if (error == 0)
cru2x(inp->inp_cred, &xuc);
INP_RUNLOCK(inp);
- } else {
- INP_INFO_RUNLOCK(&V_udbinfo);
+ } else
error = ENOENT;
- }
if (error == 0)
error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
@@ -552,6 +558,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
struct sockaddr_in6 tmp;
INP_WLOCK_ASSERT(inp);
+ INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
if (addr6) {
/* addr6 has been validated in udp6_send(). */
@@ -772,15 +779,15 @@ udp6_abort(struct socket *so)
}
#endif
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -838,8 +845,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
+ INP_HASH_WLOCK(&V_udbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -867,8 +874,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
#ifdef INET
out:
#endif
+ INP_HASH_WUNLOCK(&V_udbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -889,15 +896,15 @@ udp6_close(struct socket *so)
return;
}
#endif
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
}
static int
@@ -911,7 +918,9 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
sin6 = (struct sockaddr_in6 *)nam;
KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
+ /*
+ * XXXRW: Need to clarify locking of v4/v6 flags.
+ */
INP_WLOCK(inp);
#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
@@ -931,8 +940,10 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
+ INP_HASH_WLOCK(&V_udbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
goto out;
@@ -947,12 +958,13 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
+ INP_HASH_WLOCK(&V_udbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
+ INP_HASH_WUNLOCK(&V_udbinfo);
if (error == 0)
soisconnected(so);
out:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
}
@@ -984,32 +996,32 @@ udp6_disconnect(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
- INP_WLOCK(inp);
-
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
- error = (*pru->pru_disconnect)(so);
- goto out;
+ (void)(*pru->pru_disconnect)(so);
+ return (0);
}
#endif
+ INP_WLOCK(inp);
+
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto out;
}
+ INP_HASH_WLOCK(&V_udbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
+ INP_HASH_WUNLOCK(&V_udbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
out:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (0);
}
@@ -1023,7 +1035,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
INP_WLOCK(inp);
if (addr) {
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
@@ -1060,7 +1071,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
* select the UDPv4 output routine are invalidated?
*/
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
if (sin6)
in6_sin6_2_sin_in_sock(addr);
pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
@@ -1073,16 +1083,16 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
+ INP_HASH_WLOCK(&V_udbinfo);
error = udp6_output(inp, m, addr, control, td);
+ INP_HASH_WUNLOCK(&V_udbinfo);
#ifdef INET
#endif
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
return (error);
bad:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
m_freem(m);
return (error);
}
OpenPOWER on IntegriCloud