summaryrefslogtreecommitdiffstats
path: root/sys/netinet/in_pcb.h
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet/in_pcb.h')
-rw-r--r--sys/netinet/in_pcb.h137
1 files changed, 124 insertions, 13 deletions
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 14d4ea2..dfef963 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -44,6 +44,7 @@
#include <sys/_rwlock.h>
#ifdef _KERNEL
+#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
#include <vm/uma.h>
@@ -141,6 +142,7 @@ struct icmp6_filter;
*
* Key:
* (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (s) - Protected by another subsystem's locks
@@ -160,9 +162,12 @@ struct icmp6_filter;
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
+ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -268,22 +273,23 @@ struct inpcbport {
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by ipi_lock, covering mutable global fields (such
- * as the global pcb list) and hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
+ * the former covering mutable global fields (such as the global pcb list),
+ * and the latter covering the hashed lookup tables. The lock order is:
*
- * ipi_lock (before) inpcb locks
+ * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
- * (h) Read using either ipi_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, hash tables,
- * etc.
+ * Global lock protecting global inpcb list, inpcb count, etc.
*/
struct rwlock ipi_lock;
@@ -312,17 +318,39 @@ struct inpcbinfo {
struct uma_zone *ipi_zone; /* (c) */
/*
+ * Connection groups associated with this protocol. These fields are
+ * constant, but pcbgroup structures themselves are protected by
+ * per-pcbgroup locks.
+ */
+ struct inpcbgroup *ipi_pcbgroups; /* (c) */
+ u_int ipi_npcbgroups; /* (c) */
+ u_int ipi_hashfields; /* (c) */
+
+ /*
+ * Global lock protecting non-pcbgroup hash lookup tables.
+ */
+ struct rwlock ipi_hash_lock;
+
+ /*
* Global hash of inpcbs, hashed by local and foreign addresses and
* port numbers.
*/
- struct inpcbhead *ipi_hashbase; /* (g) */
- u_long ipi_hashmask; /* (g) */
+ struct inpcbhead *ipi_hashbase; /* (h) */
+ u_long ipi_hashmask; /* (h) */
/*
* Global hash of inpcbs, hashed by only local port number.
*/
- struct inpcbporthead *ipi_porthashbase; /* (g) */
- u_long ipi_porthashmask; /* (g) */
+ struct inpcbporthead *ipi_porthashbase; /* (h) */
+ u_long ipi_porthashmask; /* (h) */
+
+ /*
+ * List of wildcard inpcbs for use with pcbgroups. In the past, was
+ * per-pcbgroup but is now global. All pcbgroup locks must be held
+ * to modify the list, so any is sufficient to read it.
+ */
+ struct inpcbhead *ipi_wildbase; /* (p) */
+ u_long ipi_wildmask; /* (p) */
/*
* Pointer to network stack instance
@@ -335,6 +363,31 @@ struct inpcbinfo {
void *ipi_pspare[2];
};
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity. Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+ /*
+ * Per-connection group hash of inpcbs, hashed by local and foreign
+ * addresses and port numbers.
+ */
+ struct inpcbhead *ipg_hashbase; /* (c) */
+ u_long ipg_hashmask; /* (c) */
+
+ /*
+ * Notional affinity of this pcbgroup.
+ */
+ u_int ipg_cpu; /* (p) */
+
+ /*
+ * Per-connection group lock, not to be confused with ipi_lock.
+ * Protects the hash table hung off the group, but also the global
+ * wildcard list in inpcbinfo.
+ */
+ struct mtx ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -406,6 +459,26 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_HASH_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
+#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
+#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_LOCKED)
+#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
+ RA_WLOCKED)
+
+#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+ MTX_DEF | MTX_DUPOK)
+#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
+
+#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
+#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -465,8 +538,18 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
+
+/*
+ * Flags passed to in_pcblookup*() functions.
+ */
+#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
+#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
+#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
+
+#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
+ INPLOOKUP_WLOCKPCB)
-#define INPLOOKUP_WILDCARD 1
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
@@ -474,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define IPI_HASHFIELDS_NONE 0
+#define IPI_HASHFIELDS_2TUPLE 1
+#define IPI_HASHFIELDS_4TUPLE 2
+
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
@@ -505,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
- int, int, char *, uma_init, uma_fini, uint32_t);
+ int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+ in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+ struct in_addr, u_short);
+void in_pcbgroup_destroy(struct inpcbinfo *);
+int in_pcbgroup_enabled(struct inpcbinfo *);
+void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void in_pcbgroup_remove(struct inpcb *);
+void in_pcbgroup_update(struct inpcb *);
+void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -515,6 +619,8 @@ int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
+int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
+ struct mbuf *);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, in_addr_t *, u_short *, struct inpcb **,
struct ucred *);
@@ -523,16 +629,21 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
+int in_pcbinshash_nopcbgroup(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
struct inpcb *
- in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
+ in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
+struct inpcb *
+ in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
+ struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
int, struct inpcb *(*)(struct inpcb *, int));
void in_pcbref(struct inpcb *);
void in_pcbrehash(struct inpcb *);
+void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
OpenPOWER on IntegriCloud