summaryrefslogtreecommitdiffstats
path: root/sys/netinet/in_pcb.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/in_pcb.c')
-rw-r--r--sys/netinet/in_pcb.c229
1 files changed, 225 insertions, 4 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 4aa998f..4eb309a 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -212,7 +213,7 @@ void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
- uint32_t inpcbzone_flags)
+ uint32_t inpcbzone_flags, u_int hashfields)
{
INP_INFO_LOCK_INIT(pcbinfo, name);
@@ -227,6 +228,9 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
@@ -246,6 +250,9 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_destroy(pcbinfo);
+#endif
uma_zdestroy(pcbinfo->ipi_zone);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
@@ -1053,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp)
* in_pcbref() bumps the reference count on an inpcb in order to maintain
* stability of an inpcb pointer despite the inpcb lock being released. This
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
*
* in_pcbref() should be used only to provide brief memory stability, and
* must always be followed by a call to INP_WLOCK() and in_pcbrele() to
@@ -1223,6 +1231,9 @@ in_pcbdrop(struct inpcb *inp)
}
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
}
@@ -1472,6 +1483,148 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP4))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ /* XXX inp locking */
+ if (ifp && ifp->if_type == IFT_FAITH &&
+ (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif /* INET6 */
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif /* defined(INET6) */
+ if (inp != NULL)
+ goto found;
+ } /* if (lookupflags & INPLOOKUP_WILDCARD) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list, using pcbinfo tables. This variation assumes
* that the caller has locked the hash list, and will not perform any further
@@ -1636,17 +1789,30 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
* from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
*/
struct inpcb *
in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1656,12 +1822,28 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
struct ifnet *ifp, struct mbuf *m)
{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1670,8 +1852,8 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Insert PCB onto various hash lists.
*/
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@@ -1721,10 +1903,39 @@ in_pcbinshash(struct inpcb *inp)
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+ if (do_pcbgroup_update)
+ in_pcbgroup_update(inp);
+#endif
return (0);
}
/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't. The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature. in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 0));
+}
+
+/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
@@ -1755,6 +1966,13 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+ if (m != NULL)
+ in_pcbgroup_update_mbuf(inp, m);
+ else
+ in_pcbgroup_update(inp);
+#endif
}
void
@@ -1791,6 +2009,9 @@ in_pcbremlists(struct inpcb *inp)
}
LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
/*
OpenPOWER on IntegriCloud