From d43e6115b673fe03cf9272a6c74af0c41177fef9 Mon Sep 17 00:00:00 2001 From: wollman Date: Tue, 24 Mar 1998 18:06:34 +0000 Subject: Use the zone allocator to allocate inpcbs and tcpcbs. Each protocol creates its own zone; this is used particularly by TCP which allocates both inpcb and tcpcb in a single allocation. (Some hackery ensures that the tcpcb is reasonably aligned.) Also keep track of the number of pcbs of each type allocated, and keep a generation count (instance version number) for future use. --- sys/netinet/in_pcb.c | 13 +++++++---- sys/netinet/in_pcb.h | 37 ++++++++++++++++++++++--------- sys/netinet/ip_divert.c | 5 ++++- sys/netinet/raw_ip.c | 5 ++++- sys/netinet/tcp_subr.c | 54 +++++++++++++++++++++++++++++++++++++++------- sys/netinet/tcp_timewait.c | 54 +++++++++++++++++++++++++++++++++++++++------- sys/netinet/udp_usrreq.c | 5 ++++- 7 files changed, 140 insertions(+), 33 deletions(-) (limited to 'sys/netinet') diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index ede159d..59bb5d9 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 - * $Id: in_pcb.c,v 1.38 1998/01/27 09:15:03 davidg Exp $ + * $Id: in_pcb.c,v 1.39 1998/03/01 19:39:26 guido Exp $ */ #include @@ -44,6 +44,7 @@ #include #include #include +#include /* for zalloci, zfreei prototypes */ #include #include @@ -125,14 +126,15 @@ in_pcballoc(so, pcbinfo, p) { register struct inpcb *inp; - MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, - p ? M_WAITOK : M_NOWAIT); + inp = zalloci(pcbinfo->ipi_zone); if (inp == NULL) return (ENOBUFS); bzero((caddr_t)inp, sizeof(*inp)); + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); + pcbinfo->ipi_count++; so->so_pcb = (caddr_t)inp; return (0); } @@ -464,7 +466,9 @@ in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; + struct inpcbinfo *ipi = inp->inp_pcbinfo; + inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); so->so_pcb = 0; sofree(so); @@ -473,7 +477,7 @@ in_pcbdetach(inp) if (inp->inp_route.ro_rt) rtfree(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); - FREE(inp, M_PCB); + zfreei(ipi->ipi_zone, inp); } /* @@ -879,4 +883,5 @@ in_pcbremlists(inp) } } LIST_REMOVE(inp, inp_list); + inp->inp_pcbinfo->ipi_count--; } diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 27f3586..6b446ad 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 - * $Id: in_pcb.h,v 1.22 1997/08/16 19:15:36 wollman Exp $ + * $Id: in_pcb.h,v 1.23 1998/01/27 09:15:04 davidg Exp $ */ #ifndef _NETINET_IN_PCB_H_ @@ -49,6 +49,12 @@ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); +/* + * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS + * of the structure. Therefore, it is important that the members in + * that position not contain any information which is required to be + * stable. + */ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* hash list */ struct in_addr inp_faddr; /* foreign host table entry */ @@ -69,7 +75,14 @@ struct inpcb { struct ip_moptions *inp_moptions; /* IP multicast options */ LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ struct inpcbport *inp_phd; /* head of list for this PCB's local port */ + u_quad_t inp_gencnt; /* generation count of this instance */ }; +/* + * The range of the generation count, as used in this implementation, + * is 9e19. We would have to create 300 billion connections per + * second for this number to roll over in a year. This seems sufficiently + * unlikely that we simply don't concern ourselves with that possibility. + */ struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; @@ -77,15 +90,18 @@ struct inpcbport { u_short phd_port; }; -struct inpcbinfo { - struct inpcbhead *hashbase; - unsigned long hashmask; - struct inpcbporthead *porthashbase; - unsigned long porthashmask; - struct inpcbhead *listhead; - unsigned short lastport; - unsigned short lastlow; - unsigned short lasthi; +struct inpcbinfo { /* XXX documentation, prefixes */ + struct inpcbhead *hashbase; + u_long hashmask; + struct inpcbporthead *porthashbase; + u_long porthashmask; + struct inpcbhead *listhead; + u_short lastport; + u_short lastlow; + u_short lasthi; + struct vm_zone *ipi_zone; /* zone to allocate pcbs from */ + u_int ipi_count; /* number of pcbs in this list */ + u_quad_t ipi_gencnt; /* current generation count */ }; #define INP_PCBHASH(faddr, lport, fport, mask) \ @@ -131,6 +147,7 @@ void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *, void in_pcbrehash __P((struct inpcb *)); int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); int in_setsockaddr __P((struct socket *so, struct sockaddr **nam)); + #endif #endif diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index dc9b049..4eddf3a 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: ip_divert.c,v 1.19 1998/02/04 22:33:06 eivind Exp $ + * $Id: ip_divert.c,v 1.20 1998/02/06 12:13:50 eivind Exp $ */ #include "opt_inet.h" @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -116,6 +117,8 @@ div_init(void) */ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); + divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb), + nmbclusters/4, ZONE_INTERRUPT, 0); } /* diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 95f364e..a8103ff 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 - * $Id: raw_ip.c,v 1.50 1997/12/18 09:13:39 davidg Exp $ + * $Id: raw_ip.c,v 1.51 1998/01/27 09:15:07 davidg Exp $ */ #include @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,8 @@ rip_init() */ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); + ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb), + nmbclusters/4, ZONE_INTERRUPT, 0); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index b0bdac2..bf424d5 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $ + * $Id: tcp_subr.c,v 1.42 1998/01/27 09:15:10 davidg Exp $ */ #include "opt_compat.h" @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,26 @@ static void tcp_notify __P((struct inpcb *, int)); #endif /* + * This is the actual shape of what we allocate using the zone + * allocator. Doing it this way allows us to protect both structures + * using the same generation count, and also eliminates the overhead + * of allocating tcpcbs separately. By hiding the structure here, + * we avoid changing most of the rest of the code (although it needs + * to be changed, eventually, for greater efficiency). + */ +#define ALIGNMENT 32 +#define ALIGNM1 (ALIGNMENT-1) +struct inp_tp { + union { + struct inpcb inp; + char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; + } inp_tp_u; + struct tcpcb tcb; +}; +#undef ALIGNMENT +#undef ALIGNM1 + +/* * Tcp initialization */ void @@ -107,6 +128,23 @@ tcp_init() tcbinfo.listhead = &tcb; tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); + /* For the moment, we just worry about putting inpcbs here. */ + /* + * Rationale for a maximum of `nmbclusters': + * 1) It's a convenient value, sized by config, based on + * parameters already known to be tweakable as needed + * for network-intensive systems. + * 2) Under the Old World Order, when pcbs were stored in + * mbufs, it was of course impossible to have more + * pcbs than mbufs. + * 3) The zone allocator doesn't allocate physical memory + * for this many pcbs; it just sizes the virtual + * address space appropriately. Thus, even for very large + * values of nmbclusters, we don't actually take up much + * memory unless required. + */ + tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters, + ZONE_INTERRUPT, 0); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) @@ -246,17 +284,18 @@ tcp_respond(tp, ti, m, ack, seq, flags) /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument - * protocol control block. + * protocol control block. The `inp' parameter must have + * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { + struct inp_tp *it; register struct tcpcb *tp; - tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); - if (tp == NULL) - return ((struct tcpcb *)0); + it = (struct inp_tp *)inp; + tp = &it->tcb; bzero((char *) tp, sizeof(struct tcpcb)); tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; @@ -265,7 +304,7 @@ tcp_newtcpcb(inp) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (tcp_do_rfc1644) tp->t_flags |= TF_REQ_CC; - tp->t_inpcb = inp; + tp->t_inpcb = inp; /* XXX */ /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives @@ -279,7 +318,7 @@ tcp_newtcpcb(inp) tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; inp->inp_ip_ttl = ip_defttl; inp->inp_ppcb = (caddr_t)tp; - return (tp); + return (tp); /* XXX */ } /* @@ -418,7 +457,6 @@ tcp_close(tp) if (tp->t_template) (void) m_free(dtom(tp->t_template)); inp->inp_ppcb = NULL; - free(tp, M_PCB); soisdisconnected(so); in_pcbdetach(inp); tcpstat.tcps_closed++; diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index b0bdac2..bf424d5 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $ + * $Id: tcp_subr.c,v 1.42 1998/01/27 09:15:10 davidg Exp $ */ #include "opt_compat.h" @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,26 @@ static void tcp_notify __P((struct inpcb *, int)); #endif /* + * This is the actual shape of what we allocate using the zone + * allocator. Doing it this way allows us to protect both structures + * using the same generation count, and also eliminates the overhead + * of allocating tcpcbs separately. By hiding the structure here, + * we avoid changing most of the rest of the code (although it needs + * to be changed, eventually, for greater efficiency). + */ +#define ALIGNMENT 32 +#define ALIGNM1 (ALIGNMENT-1) +struct inp_tp { + union { + struct inpcb inp; + char align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1]; + } inp_tp_u; + struct tcpcb tcb; +}; +#undef ALIGNMENT +#undef ALIGNM1 + +/* * Tcp initialization */ void @@ -107,6 +128,23 @@ tcp_init() tcbinfo.listhead = &tcb; tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); + /* For the moment, we just worry about putting inpcbs here. */ + /* + * Rationale for a maximum of `nmbclusters': + * 1) It's a convenient value, sized by config, based on + * parameters already known to be tweakable as needed + * for network-intensive systems. + * 2) Under the Old World Order, when pcbs were stored in + * mbufs, it was of course impossible to have more + * pcbs than mbufs. + * 3) The zone allocator doesn't allocate physical memory + * for this many pcbs; it just sizes the virtual + * address space appropriately. Thus, even for very large + * values of nmbclusters, we don't actually take up much + * memory unless required. + */ + tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), nmbclusters, + ZONE_INTERRUPT, 0); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) @@ -246,17 +284,18 @@ tcp_respond(tp, ti, m, ack, seq, flags) /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument - * protocol control block. + * protocol control block. The `inp' parameter must have + * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(inp) struct inpcb *inp; { + struct inp_tp *it; register struct tcpcb *tp; - tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); - if (tp == NULL) - return ((struct tcpcb *)0); + it = (struct inp_tp *)inp; + tp = &it->tcb; bzero((char *) tp, sizeof(struct tcpcb)); tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; @@ -265,7 +304,7 @@ tcp_newtcpcb(inp) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (tcp_do_rfc1644) tp->t_flags |= TF_REQ_CC; - tp->t_inpcb = inp; + tp->t_inpcb = inp; /* XXX */ /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives @@ -279,7 +318,7 @@ tcp_newtcpcb(inp) tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; inp->inp_ip_ttl = ip_defttl; inp->inp_ppcb = (caddr_t)tp; - return (tp); + return (tp); /* XXX */ } /* @@ -418,7 +457,6 @@ tcp_close(tp) if (tp->t_template) (void) m_free(dtom(tp->t_template)); inp->inp_ppcb = NULL; - free(tp, M_PCB); soisdisconnected(so); in_pcbdetach(inp); tcpstat.tcps_closed++; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 773e42d..f9dd1a7 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 - * $Id: udp_usrreq.c,v 1.43 1998/01/25 17:25:41 steve Exp $ + * $Id: udp_usrreq.c,v 1.44 1998/01/27 09:15:13 davidg Exp $ */ #include @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -98,6 +99,8 @@ udp_init() udbinfo.listhead = &udb; udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); + udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), nmbclusters, + ZONE_INTERRUPT, 0); } void -- cgit v1.1