diff options
Diffstat (limited to 'sys/netinet/in_pcb.h')
-rw-r--r-- | sys/netinet/in_pcb.h | 137 |
1 files changed, 124 insertions, 13 deletions
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 14d4ea2..dfef963 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -44,6 +44,7 @@ #include <sys/_rwlock.h> #ifdef _KERNEL +#include <sys/lock.h> #include <sys/rwlock.h> #include <net/vnet.h> #include <vm/uma.h> @@ -141,6 +142,7 @@ struct icmp6_filter; * * Key: * (c) - Constant after initialization + * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock * (p) - Protected by the pcbinfo lock for the inpcb * (s) - Protected by another subsystem's locks @@ -160,9 +162,12 @@ struct icmp6_filter; */ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ + LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ + struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ + LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */ struct socket *inp_socket; /* (i) back pointer to socket */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ @@ -268,22 +273,23 @@ struct inpcbport { * Global data structure for each high-level protocol (UDP, TCP, ...) in both * IPv4 and IPv6. Holds inpcb lists and information for managing them. * - * Each pcbinfo is protected by ipi_lock, covering mutable global fields (such - * as the global pcb list) and hashed lookup tables. The lock order is: + * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock, + * the former covering mutable global fields (such as the global pcb list), + * and the latter covering the hashed lookup tables. The lock order is: * - * ipi_lock (before) inpcb locks + * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks} * * Locking key: * * (c) Constant or nearly constant after initialisation * (g) Locked by ipi_lock - * (h) Read using either ipi_lock or inpcb lock; write requires both. + * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (p) Protected by one or more pcbgroup locks * (x) Synchronisation properties poorly defined */ struct inpcbinfo { /* - * Global lock protecting global inpcb list, inpcb count, hash tables, - * etc. + * Global lock protecting global inpcb list, inpcb count, etc. */ struct rwlock ipi_lock; @@ -312,17 +318,39 @@ struct inpcbinfo { struct uma_zone *ipi_zone; /* (c) */ /* + * Connection groups associated with this protocol. These fields are + * constant, but pcbgroup structures themselves are protected by + * per-pcbgroup locks. + */ + struct inpcbgroup *ipi_pcbgroups; /* (c) */ + u_int ipi_npcbgroups; /* (c) */ + u_int ipi_hashfields; /* (c) */ + + /* + * Global lock protecting non-pcbgroup hash lookup tables. + */ + struct rwlock ipi_hash_lock; + + /* * Global hash of inpcbs, hashed by local and foreign addresses and * port numbers. */ - struct inpcbhead *ipi_hashbase; /* (g) */ - u_long ipi_hashmask; /* (g) */ + struct inpcbhead *ipi_hashbase; /* (h) */ + u_long ipi_hashmask; /* (h) */ /* * Global hash of inpcbs, hashed by only local port number. */ - struct inpcbporthead *ipi_porthashbase; /* (g) */ - u_long ipi_porthashmask; /* (g) */ + struct inpcbporthead *ipi_porthashbase; /* (h) */ + u_long ipi_porthashmask; /* (h) */ + + /* + * List of wildcard inpcbs for use with pcbgroups. In the past, was + * per-pcbgroup but is now global. All pcbgroup locks must be held + * to modify the list, so any is sufficient to read it. + */ + struct inpcbhead *ipi_wildbase; /* (p) */ + u_long ipi_wildmask; /* (p) */ /* * Pointer to network stack instance @@ -335,6 +363,31 @@ struct inpcbinfo { void *ipi_pspare[2]; }; +/* + * Connection groups hold sets of connections that have similar CPU/thread + * affinity. Each connection belongs to exactly one connection group. + */ +struct inpcbgroup { + /* + * Per-connection group hash of inpcbs, hashed by local and foreign + * addresses and port numbers. + */ + struct inpcbhead *ipg_hashbase; /* (c) */ + u_long ipg_hashmask; /* (c) */ + + /* + * Notional affinity of this pcbgroup. + */ + u_int ipg_cpu; /* (p) */ + + /* + * Per-connection group lock, not to be confused with ipi_lock. + * Protects the hash table hung off the group, but also the global + * wildcard list in inpcbinfo. + */ + struct mtx ipg_lock; +} __aligned(CACHE_LINE_SIZE); + #define INP_LOCK_INIT(inp, d, t) \ rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) #define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) @@ -406,6 +459,26 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED) #define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED) +#define INP_HASH_LOCK_INIT(ipi, d) \ + rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0) +#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock) +#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ + RA_LOCKED) +#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ + RA_WLOCKED) + +#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ + MTX_DEF | MTX_DUPOK) +#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock) + +#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock) +#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED) +#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock) + #define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ @@ -465,8 +538,18 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, */ #define INP_LLE_VALID 0x00000001 /* cached lle is valid */ #define INP_RT_VALID 0x00000002 /* cached rtentry is valid */ +#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ + +/* + * Flags passed to in_pcblookup*() functions. + */ +#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */ +#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */ +#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */ + +#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ + INPLOOKUP_WLOCKPCB) -#define INPLOOKUP_WILDCARD 1 #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ @@ -474,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) +/* + * Constants for pcbinfo.ipi_hashfields. + */ +#define IPI_HASHFIELDS_NONE 0 +#define IPI_HASHFIELDS_2TUPLE 1 +#define IPI_HASHFIELDS_4TUPLE 2 + #ifdef _KERNEL VNET_DECLARE(int, ipport_reservedhigh); VNET_DECLARE(int, ipport_reservedlow); @@ -505,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs); void in_pcbinfo_destroy(struct inpcbinfo *); void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, - int, int, char *, uma_init, uma_fini, uint32_t); + int, int, char *, uma_init, uma_fini, uint32_t, u_int); + +struct inpcbgroup * + in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in_pcbgroup_byinpcb(struct inpcb *); +struct inpcbgroup * + in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short, + struct in_addr, u_short); +void in_pcbgroup_destroy(struct inpcbinfo *); +int in_pcbgroup_enabled(struct inpcbinfo *); +void in_pcbgroup_init(struct inpcbinfo *, u_int, int); +void in_pcbgroup_remove(struct inpcb *); +void in_pcbgroup_update(struct inpcb *); +void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *); void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *); @@ -515,6 +619,8 @@ int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *, int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, struct ucred *); int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *, + struct mbuf *); int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, in_addr_t *, u_short *, struct inpcb **, struct ucred *); @@ -523,16 +629,21 @@ void in_pcbdisconnect(struct inpcb *); void in_pcbdrop(struct inpcb *); void in_pcbfree(struct inpcb *); int in_pcbinshash(struct inpcb *); +int in_pcbinshash_nopcbgroup(struct inpcb *); struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short, int, struct ucred *); struct inpcb * - in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, + in_pcblookup(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *); +struct inpcb * + in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int, + struct in_addr, u_int, int, struct ifnet *, struct mbuf *); void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int)); void in_pcbref(struct inpcb *); void in_pcbrehash(struct inpcb *); +void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *); int in_pcbrele(struct inpcb *); int in_pcbrele_rlocked(struct inpcb *); int in_pcbrele_wlocked(struct inpcb *); |