diff options
author | dg <dg@FreeBSD.org> | 1998-01-27 09:15:13 +0000 |
---|---|---|
committer | dg <dg@FreeBSD.org> | 1998-01-27 09:15:13 +0000 |
commit | 7262ff6e58b1d30213c744786d6687611d4695c7 (patch) | |
tree | ec76082ebe6779d7a4cff2c107e7727a3e838a26 /sys | |
parent | 7a6d3914c386d52c97a9c61f28c4e28f617860d4 (diff) | |
download | FreeBSD-src-7262ff6e58b1d30213c744786d6687611d4695c7.zip FreeBSD-src-7262ff6e58b1d30213c744786d6687611d4695c7.tar.gz |
Improved connection establishment performance by doing local port lookups via
a hashed port list. In the new scheme, in_pcblookup() goes away and is
replaced by a new routine, in_pcblookup_local() for doing the local port
check. Note that this implementation is space inefficient in that the PCB
struct is now too large to fit into 128 bytes. I might deal with this in the
future by using the new zone allocator, but I wanted these changes to be
extensively tested in their current form first.
Also:
1) Fixed off-by-one errors in the port lookup loops in in_pcbbind().
2) Got rid of some unneeded rehashing. Adding a new routine, in_pcbinshash()
to do the initialial hash insertion.
3) Renamed in_pcblookuphash() to in_pcblookup_hash() for easier readability.
4) Added a new routine, in_pcbremlists() to remove the PCB from the various
hash lists.
5) Added/deleted comments where appropriate.
6) Removed unnecessary splnet() locking. In general, the PCB functions should
be called at splnet()...there are unfortunately a few exceptions, however.
7) Reorganized a few structs for better cache line behavior.
8) Killed my TCP_ACK_HACK kludge. It may come back in a different form in
the future, however.
These changes have been tested on wcarchive for more than a month. In tests
done here, connection establishment overhead is reduced by more than 50
times, thus getting rid of one of the major networking scalability problems.
Still to do: make tcp_fastimo/tcp_slowtimo scale well for systems with a
large number of connections. tcp_fastimo is easy; tcp_slowtimo is difficult.
WARNING: Anything that knows about inpcb and tcpcb structs will have to be
recompiled; at the very least, this includes netstat(1).
Diffstat (limited to 'sys')
-rw-r--r-- | sys/netinet/in_pcb.c | 303 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 57 | ||||
-rw-r--r-- | sys/netinet/ip_divert.c | 3 | ||||
-rw-r--r-- | sys/netinet/raw_ip.c | 3 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 38 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 38 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 10 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 10 | ||||
-rw-r--r-- | sys/netinet/tcp_usrreq.c | 6 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 68 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 7 |
11 files changed, 305 insertions, 238 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index e9a83d3..afa6fbd 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 - * $Id: in_pcb.c,v 1.36 1997/12/23 01:40:40 alex Exp $ + * $Id: in_pcb.c,v 1.37 1997/12/25 06:57:36 davidg Exp $ */ #include <sys/param.h> @@ -55,8 +55,8 @@ struct in_addr zeroin_addr; -static void in_pcbinshash __P((struct inpcb *)); -static void in_rtchange __P((struct inpcb *, int)); +static void in_pcbremlists __P((struct inpcb *)); +static void in_rtchange __P((struct inpcb *, int)); /* * These configure the range of local port addresses assigned to @@ -106,6 +106,17 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); +/* + * in_pcb.c: manage the Protocol Control Blocks. + * + * NOTE: It is assumed that most of these functions will be called at + * splnet(). XXX - There are, unfortunately, a few exceptions to this + * rule that should be fixed. + */ + +/* + * Allocate a PCB and associate it with the socket. + */ int in_pcballoc(so, pcbinfo, p) struct socket *so; @@ -113,7 +124,6 @@ in_pcballoc(so, pcbinfo, p) struct proc *p; { register struct inpcb *inp; - int s; MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, p ? M_WAITOK : M_NOWAIT); @@ -122,10 +132,7 @@ in_pcballoc(so, pcbinfo, p) bzero((caddr_t)inp, sizeof(*inp)); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; - s = splnet(); LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); - in_pcbinshash(inp); - splx(s); so->so_pcb = (caddr_t)inp; return (0); } @@ -139,6 +146,7 @@ in_pcbbind(inp, nam, p) register struct socket *so = inp->inp_socket; unsigned short *lastport; struct sockaddr_in *sin; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); int error; @@ -147,9 +155,7 @@ in_pcbbind(inp, nam, p) return (EADDRNOTAVAIL); if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 && - ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 || - (so->so_options & SO_ACCEPTCONN) == 0)) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; if (nam) { sin = (struct sockaddr_in *)nam; @@ -186,8 +192,8 @@ in_pcbbind(inp, nam, p) if (ntohs(lport) < IPPORT_RESERVED && p && suser(p->p_ucred, &p->p_acflag)) return (EACCES); - t = in_pcblookup(inp->inp_pcbinfo, zeroin_addr, 0, - sin->sin_addr, lport, wild); + t = in_pcblookup_local(pcbinfo, sin->sin_addr, + lport, wild); if (t && (reuseport & t->inp_socket->so_options) == 0) return (EADDRINUSE); } @@ -202,17 +208,17 @@ in_pcbbind(inp, nam, p) if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; - lastport = &inp->inp_pcbinfo->lasthi; + lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { if (p && (error = suser(p->p_ucred, &p->p_acflag))) return error; first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ - lastport = &inp->inp_pcbinfo->lastlow; + lastport = &pcbinfo->lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; - lastport = &inp->inp_pcbinfo->lastport; + lastport = &pcbinfo->lastport; } /* * Simple check to ensure all ports are not used up causing @@ -228,14 +234,20 @@ in_pcbbind(inp, nam, p) count = first - last; do { - if (count-- <= 0) /* completely used? */ - return (EADDRNOTAVAIL); + if (count-- < 0) { /* completely used? */ + /* + * Undo any address bind that may have + * occurred above. + */ + inp->inp_laddr.s_addr = INADDR_ANY; + return (EAGAIN); + } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); - } while (in_pcblookup(inp->inp_pcbinfo, - zeroin_addr, 0, inp->inp_laddr, lport, wild)); + } while (in_pcblookup_local(pcbinfo, + inp->inp_laddr, lport, wild)); } else { /* * counting up @@ -243,18 +255,28 @@ in_pcbbind(inp, nam, p) count = last - first; do { - if (count-- <= 0) /* completely used? */ - return (EADDRNOTAVAIL); + if (count-- < 0) { /* completely used? */ + /* + * Undo any address bind that may have + * occurred above. + */ + inp->inp_laddr.s_addr = INADDR_ANY; + return (EAGAIN); + } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); - } while (in_pcblookup(inp->inp_pcbinfo, - zeroin_addr, 0, inp->inp_laddr, lport, wild)); + } while (in_pcblookup_local(pcbinfo, + inp->inp_laddr, lport, wild)); } } inp->inp_lport = lport; - in_pcbrehash(inp); + if (in_pcbinshash(inp) != 0) { + inp->inp_laddr.s_addr = INADDR_ANY; + inp->inp_lport = 0; + return (EAGAIN); + } return (0); } @@ -403,10 +425,11 @@ in_pcbconnect(inp, nam, p) if (error = in_pcbladdr(inp, nam, &ifaddr)) return(error); - if (in_pcblookuphash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, + if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, - inp->inp_lport, 0) != NULL) + inp->inp_lport, 0) != NULL) { return (EADDRINUSE); + } if (inp->inp_laddr.s_addr == INADDR_ANY) { if (inp->inp_lport == 0) (void)in_pcbbind(inp, (struct sockaddr *)0, p); @@ -435,8 +458,8 @@ in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; - int s; + in_pcbremlists(inp); so->so_pcb = 0; sofree(so); if (inp->inp_options) @@ -444,10 +467,6 @@ in_pcbdetach(inp) if (inp->inp_route.ro_rt) rtfree(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); - s = splnet(); - LIST_REMOVE(inp, inp_hash); - LIST_REMOVE(inp, inp_list); - splx(s); FREE(inp, M_PCB); } @@ -470,6 +489,9 @@ in_setsockaddr(so, nam) register struct inpcb *inp; register struct sockaddr_in *sin; + /* + * Do the malloc first in case it blocks. + */ MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); bzero(sin, sizeof *sin); sin->sin_family = AF_INET; @@ -499,6 +521,9 @@ in_setpeeraddr(so, nam) struct inpcb *inp; register struct sockaddr_in *sin; + /* + * Do the malloc first in case it blocks. + */ MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); bzero((caddr_t)sin, sizeof (*sin)); sin->sin_family = AF_INET; @@ -527,8 +552,6 @@ in_setpeeraddr(so, nam) * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. - * - * Must be called at splnet. */ void in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify) @@ -636,62 +659,94 @@ in_rtchange(inp, errno) } } +/* + * Lookup a PCB based on the local address and port. + */ struct inpcb * -in_pcblookup(pcbinfo, faddr, fport_arg, laddr, lport_arg, wild_okay) +in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) struct inpcbinfo *pcbinfo; - struct in_addr faddr, laddr; - u_int fport_arg, lport_arg; + struct in_addr laddr; + u_int lport_arg; int wild_okay; { register struct inpcb *inp, *match = NULL; int matchwild = 3, wildcard; - u_short fport = fport_arg, lport = lport_arg; - int s; + u_short lport = lport_arg; - s = splnet(); - - for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) { - if (inp->inp_lport != lport) - continue; - wildcard = 0; - if (inp->inp_faddr.s_addr != INADDR_ANY) { - if (faddr.s_addr == INADDR_ANY) - wildcard++; - else if (inp->inp_faddr.s_addr != faddr.s_addr || - inp->inp_fport != fport) - continue; - } else { - if (faddr.s_addr != INADDR_ANY) - wildcard++; - } - if (inp->inp_laddr.s_addr != INADDR_ANY) { - if (laddr.s_addr == INADDR_ANY) - wildcard++; - else if (inp->inp_laddr.s_addr != laddr.s_addr) - continue; - } else { - if (laddr.s_addr != INADDR_ANY) - wildcard++; + if (!wild_okay) { + struct inpcbhead *head; + /* + * Look for an unconnected (wildcard foreign addr) PCB that + * matches the local address and port we're looking for. + */ + head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; + for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { + if (inp->inp_faddr.s_addr == INADDR_ANY && + inp->inp_laddr.s_addr == laddr.s_addr && + inp->inp_lport == lport) { + /* + * Found. + */ + return (inp); + } } - if (wildcard && wild_okay == 0) - continue; - if (wildcard < matchwild) { - match = inp; - matchwild = wildcard; - if (matchwild == 0) { + /* + * Not found. + */ + return (NULL); + } else { + struct inpcbporthead *porthash; + struct inpcbport *phd; + struct inpcb *match = NULL; + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->porthashmask)]; + for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { + if (phd->phd_port == lport) break; + } + if (phd != NULL) { + /* + * Port is in use by one or more PCBs. Look for best + * fit. + */ + for (inp = phd->phd_pcblist.lh_first; inp != NULL; + inp = inp->inp_portlist.le_next) { + wildcard = 0; + if (inp->inp_faddr.s_addr != INADDR_ANY) + wildcard++; + if (inp->inp_laddr.s_addr != INADDR_ANY) { + if (laddr.s_addr == INADDR_ANY) + wildcard++; + else if (inp->inp_laddr.s_addr != laddr.s_addr) + continue; + } else { + if (laddr.s_addr != INADDR_ANY) + wildcard++; + } + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) { + break; + } + } } } + return (match); } - splx(s); - return (match); } /* * Lookup PCB in hash list. */ struct inpcb * -in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard) +in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard) struct inpcbinfo *pcbinfo; struct in_addr faddr, laddr; u_int fport_arg, lport_arg; @@ -700,9 +755,7 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard) struct inpcbhead *head; register struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; - int s; - s = splnet(); /* * First look for an exact match. */ @@ -711,8 +764,12 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard) if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_fport == fport && - inp->inp_lport == lport) - goto found; + inp->inp_lport == lport) { + /* + * Found. + */ + return (inp); + } } if (wildcard) { struct inpcb *local_wild = NULL; @@ -720,64 +777,100 @@ in_pcblookuphash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard) head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { if (inp->inp_faddr.s_addr == INADDR_ANY && - inp->inp_fport == 0 && inp->inp_lport == lport) { + inp->inp_lport == lport) { if (inp->inp_laddr.s_addr == laddr.s_addr) - goto found; + return (inp); else if (inp->inp_laddr.s_addr == INADDR_ANY) local_wild = inp; } } - if (local_wild != NULL) { - inp = local_wild; - goto found; - } + return (local_wild); } - splx(s); - return (NULL); -found: /* - * Move PCB to head of this hash chain so that it can be - * found more quickly in the future. - * XXX - this is a pessimization on machines with few - * concurrent connections. + * Not found. */ - if (inp != head->lh_first) { - LIST_REMOVE(inp, inp_hash); - LIST_INSERT_HEAD(head, inp, inp_hash); - } - splx(s); - return (inp); + return (NULL); } /* - * Insert PCB into hash chain. Must be called at splnet. + * Insert PCB onto various hash lists. */ -static void +int in_pcbinshash(inp) struct inpcb *inp; { - struct inpcbhead *head; + struct inpcbhead *pcbhash; + struct inpcbporthead *pcbporthash; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + struct inpcbport *phd; - head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr, - inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; + pcbhash = &pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr, + inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; - LIST_INSERT_HEAD(head, inp, inp_hash); + pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, + pcbinfo->porthashmask)]; + + /* + * Go through port list and look for a head for this lport. + */ + for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { + if (phd->phd_port == inp->inp_lport) + break; + } + /* + * If none exists, malloc one and tack it on. + */ + if (phd == NULL) { + MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); + if (phd == NULL) { + return (ENOBUFS); /* XXX */ + } + phd->phd_port = inp->inp_lport; + LIST_INIT(&phd->phd_pcblist); + LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); + } + inp->inp_phd = phd; + LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); + LIST_INSERT_HEAD(pcbhash, inp, inp_hash); + return (0); } +/* + * Move PCB to the proper hash bucket when { faddr, fport } have been + * changed. NOTE: This does not handle the case of the lport changing (the + * hashed port list would have to be updated as well), so the lport must + * not change after in_pcbinshash() has been called. + */ void in_pcbrehash(inp) struct inpcb *inp; { struct inpcbhead *head; - int s; - - s = splnet(); - LIST_REMOVE(inp, inp_hash); head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr, inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; + LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); - splx(s); +} + +/* + * Remove PCB from various lists. + */ +static void +in_pcbremlists(inp) + struct inpcb *inp; +{ + if (inp->inp_lport) { + struct inpcbport *phd = inp->inp_phd; + + LIST_REMOVE(inp, inp_hash); + LIST_REMOVE(inp, inp_portlist); + if (phd->phd_pcblist.lh_first == NULL) { + LIST_REMOVE(phd, phd_hash); + free(phd, M_PCB); + } + } + LIST_REMOVE(inp, inp_list); } diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 18e40e9..27f3586 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 - * $Id: in_pcb.h,v 1.21 1997/04/27 20:01:04 wollman Exp $ + * $Id: in_pcb.h,v 1.22 1997/08/16 19:15:36 wollman Exp $ */ #ifndef _NETINET_IN_PCB_H_ @@ -47,16 +47,17 @@ * control block. */ LIST_HEAD(inpcbhead, inpcb); +LIST_HEAD(inpcbporthead, inpcbport); struct inpcb { - LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ - LIST_ENTRY(inpcb) inp_hash; /* hash list */ - struct inpcbinfo *inp_pcbinfo; /* PCB list info */ + LIST_ENTRY(inpcb) inp_hash; /* hash list */ struct in_addr inp_faddr; /* foreign host table entry */ struct in_addr inp_laddr; /* local host table entry */ u_short inp_fport; /* foreign port */ u_short inp_lport; /* local port */ + LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ caddr_t inp_ppcb; /* pointer to per-protocol pcb */ + struct inpcbinfo *inp_pcbinfo; /* PCB list info */ struct socket *inp_socket; /* back pointer to socket */ struct mbuf *inp_options; /* IP options */ struct route inp_route; /* placeholder for routing entry */ @@ -66,22 +67,31 @@ struct inpcb { u_char inp_ip_p; /* protocol proto */ u_char pad[1]; /* alignment */ struct ip_moptions *inp_moptions; /* IP multicast options */ -#if 0 /* Someday, perhaps... */ - struct ip inp_ip; /* header prototype; should have more */ -#endif + LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ + struct inpcbport *inp_phd; /* head of list for this PCB's local port */ +}; + +struct inpcbport { + LIST_ENTRY(inpcbport) phd_hash; + struct inpcbhead phd_pcblist; + u_short phd_port; }; struct inpcbinfo { - struct inpcbhead *listhead; struct inpcbhead *hashbase; unsigned long hashmask; + struct inpcbporthead *porthashbase; + unsigned long porthashmask; + struct inpcbhead *listhead; unsigned short lastport; unsigned short lastlow; unsigned short lasthi; }; #define INP_PCBHASH(faddr, lport, fport, mask) \ - (((faddr) ^ ((faddr) >> 16) ^ (lport) ^ (fport)) & (mask)) + (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) +#define INP_PCBPORTHASH(lport, mask) \ + (ntohs((lport)) & (mask)) /* flags in inp_flags: */ #define INP_RECVOPTS 0x01 /* receive incoming IP options */ @@ -101,25 +111,26 @@ struct inpcbinfo { #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #ifdef KERNEL -void in_losing __P((struct inpcb *)); -int in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *)); -int in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *)); -int in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *)); -void in_pcbdetach __P((struct inpcb *)); -void in_pcbdisconnect __P((struct inpcb *)); -int in_pcbladdr __P((struct inpcb *, struct sockaddr *, +void in_losing __P((struct inpcb *)); +int in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *)); +int in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *)); +int in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *)); +void in_pcbdetach __P((struct inpcb *)); +void in_pcbdisconnect __P((struct inpcb *)); +int in_pcbinshash __P((struct inpcb *)); +int in_pcbladdr __P((struct inpcb *, struct sockaddr *, struct sockaddr_in **)); struct inpcb * - in_pcblookup __P((struct inpcbinfo *, - struct in_addr, u_int, struct in_addr, u_int, int)); + in_pcblookup_local __P((struct inpcbinfo *, + struct in_addr, u_int, int)); struct inpcb * - in_pcblookuphash __P((struct inpcbinfo *, + in_pcblookup_hash __P((struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int)); -void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *, +void in_pcbnotify __P((struct inpcbhead *, struct sockaddr *, u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int))); -void in_pcbrehash __P((struct inpcb *)); -int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); -int in_setsockaddr __P((struct socket *so, struct sockaddr **nam)); +void in_pcbrehash __P((struct inpcb *)); +int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); +int in_setsockaddr __P((struct socket *so, struct sockaddr **nam)); #endif #endif diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 7834c0a..66ef08b 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: ip_divert.c,v 1.16 1997/12/18 09:13:34 davidg Exp $ + * $Id: ip_divert.c,v 1.17 1998/01/08 23:41:50 eivind Exp $ */ #include "opt_inet.h" @@ -115,6 +115,7 @@ div_init(void) * over the place for hashbase == NULL. */ divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); + divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); } /* diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 416cda1..95f364e 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 - * $Id: raw_ip.c,v 1.49 1997/09/14 03:10:40 peter Exp $ + * $Id: raw_ip.c,v 1.50 1997/12/18 09:13:39 davidg Exp $ */ #include <sys/param.h> @@ -93,6 +93,7 @@ rip_init() * over the place for hashbase == NULL. */ ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); + ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 0fe6d37..a8c15cd 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 - * $Id: tcp_input.c,v 1.67 1997/12/19 23:46:15 bde Exp $ + * $Id: tcp_input.c,v 1.68 1998/01/21 02:05:59 fenner Exp $ */ #include "opt_tcpdebug.h" @@ -105,27 +105,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int)); * Set DELACK for segments received in order, but ack immediately * when segments are out of order (so fast retransmit can work). */ -#ifdef TCP_ACK_HACK -#define TCP_REASS(tp, ti, m, so, flags) { \ - if ((ti)->ti_seq == (tp)->rcv_nxt && \ - (tp)->seg_next == (struct tcpiphdr *)(tp) && \ - (tp)->t_state == TCPS_ESTABLISHED) { \ - if (ti->ti_flags & TH_PUSH) \ - tp->t_flags |= TF_ACKNOW; \ - else \ - tp->t_flags |= TF_DELACK; \ - (tp)->rcv_nxt += (ti)->ti_len; \ - flags = (ti)->ti_flags & TH_FIN; \ - tcpstat.tcps_rcvpack++;\ - tcpstat.tcps_rcvbyte += (ti)->ti_len;\ - sbappend(&(so)->so_rcv, (m)); \ - sorwakeup(so); \ - } else { \ - (flags) = tcp_reass((tp), (ti), (m)); \ - tp->t_flags |= TF_ACKNOW; \ - } \ -} -#else #define TCP_REASS(tp, ti, m, so, flags) { \ if ((ti)->ti_seq == (tp)->rcv_nxt && \ (tp)->seg_next == (struct tcpiphdr *)(tp) && \ @@ -142,7 +121,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int)); tp->t_flags |= TF_ACKNOW; \ } \ } -#endif #ifndef TUBA_INCLUDE static int @@ -358,7 +336,7 @@ tcp_input(m, iphlen) * Locate pcb for segment. */ findpcb: - inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport, + inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport, 1); /* @@ -440,10 +418,16 @@ findpcb: inp = (struct inpcb *)so->so_pcb; inp->inp_laddr = ti->ti_dst; inp->inp_lport = ti->ti_dport; - in_pcbrehash(inp); -#if BSD>=43 + if (in_pcbinshash(inp) != 0) { + /* + * Undo the assignments above if we failed to put + * the PCB on the hash lists. + */ + inp->inp_laddr.s_addr = INADDR_ANY; + inp->inp_lport = 0; + goto drop; + } inp->inp_options = ip_srcroute(); -#endif tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 0fe6d37..a8c15cd 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 - * $Id: tcp_input.c,v 1.67 1997/12/19 23:46:15 bde Exp $ + * $Id: tcp_input.c,v 1.68 1998/01/21 02:05:59 fenner Exp $ */ #include "opt_tcpdebug.h" @@ -105,27 +105,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int)); * Set DELACK for segments received in order, but ack immediately * when segments are out of order (so fast retransmit can work). */ -#ifdef TCP_ACK_HACK -#define TCP_REASS(tp, ti, m, so, flags) { \ - if ((ti)->ti_seq == (tp)->rcv_nxt && \ - (tp)->seg_next == (struct tcpiphdr *)(tp) && \ - (tp)->t_state == TCPS_ESTABLISHED) { \ - if (ti->ti_flags & TH_PUSH) \ - tp->t_flags |= TF_ACKNOW; \ - else \ - tp->t_flags |= TF_DELACK; \ - (tp)->rcv_nxt += (ti)->ti_len; \ - flags = (ti)->ti_flags & TH_FIN; \ - tcpstat.tcps_rcvpack++;\ - tcpstat.tcps_rcvbyte += (ti)->ti_len;\ - sbappend(&(so)->so_rcv, (m)); \ - sorwakeup(so); \ - } else { \ - (flags) = tcp_reass((tp), (ti), (m)); \ - tp->t_flags |= TF_ACKNOW; \ - } \ -} -#else #define TCP_REASS(tp, ti, m, so, flags) { \ if ((ti)->ti_seq == (tp)->rcv_nxt && \ (tp)->seg_next == (struct tcpiphdr *)(tp) && \ @@ -142,7 +121,6 @@ static void tcp_xmit_timer __P((struct tcpcb *, int)); tp->t_flags |= TF_ACKNOW; \ } \ } -#endif #ifndef TUBA_INCLUDE static int @@ -358,7 +336,7 @@ tcp_input(m, iphlen) * Locate pcb for segment. */ findpcb: - inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport, + inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport, 1); /* @@ -440,10 +418,16 @@ findpcb: inp = (struct inpcb *)so->so_pcb; inp->inp_laddr = ti->ti_dst; inp->inp_lport = ti->ti_dport; - in_pcbrehash(inp); -#if BSD>=43 + if (in_pcbinshash(inp) != 0) { + /* + * Undo the assignments above if we failed to put + * the PCB on the hash lists. + */ + inp->inp_laddr.s_addr = INADDR_ANY; + inp->inp_lport = 0; + goto drop; + } inp->inp_options = ip_srcroute(); -#endif tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 962dc47..b0bdac2 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.40 1997/12/19 03:36:14 julian Exp $ + * $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $ */ #include "opt_compat.h" @@ -87,11 +87,10 @@ static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); /* - * Target size of TCP PCB hash table. Will be rounded down to a prime - * number. + * Target size of TCP PCB hash tables. Must be a power of two. */ #ifndef TCBHASHSIZE -#define TCBHASHSIZE 128 +#define TCBHASHSIZE 512 #endif /* @@ -107,6 +106,7 @@ tcp_init() LIST_INIT(&tcb); tcbinfo.listhead = &tcb; tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); + tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) @@ -417,8 +417,8 @@ tcp_close(tp) } if (tp->t_template) (void) m_free(dtom(tp->t_template)); + inp->inp_ppcb = NULL; free(tp, M_PCB); - inp->inp_ppcb = 0; soisdisconnected(so); in_pcbdetach(inp); tcpstat.tcps_closed++; diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 962dc47..b0bdac2 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 - * $Id: tcp_subr.c,v 1.40 1997/12/19 03:36:14 julian Exp $ + * $Id: tcp_subr.c,v 1.41 1998/01/25 04:23:32 eivind Exp $ */ #include "opt_compat.h" @@ -87,11 +87,10 @@ static void tcp_cleartaocache __P((void)); static void tcp_notify __P((struct inpcb *, int)); /* - * Target size of TCP PCB hash table. Will be rounded down to a prime - * number. + * Target size of TCP PCB hash tables. Must be a power of two. */ #ifndef TCBHASHSIZE -#define TCBHASHSIZE 128 +#define TCBHASHSIZE 512 #endif /* @@ -107,6 +106,7 @@ tcp_init() LIST_INIT(&tcb); tcbinfo.listhead = &tcb; tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); + tcbinfo.porthashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.porthashmask); if (max_protohdr < sizeof(struct tcpiphdr)) max_protohdr = sizeof(struct tcpiphdr); if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) @@ -417,8 +417,8 @@ tcp_close(tp) } if (tp->t_template) (void) m_free(dtom(tp->t_template)); + inp->inp_ppcb = NULL; free(tp, M_PCB); - inp->inp_ppcb = 0; soisdisconnected(so); in_pcbdetach(inp); tcpstat.tcps_closed++; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 7f33c00..cc94bbb 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 - * $Id: tcp_usrreq.c,v 1.35 1997/09/16 18:36:06 joerg Exp $ + * $Id: tcp_usrreq.c,v 1.36 1997/12/18 09:50:38 davidg Exp $ */ #include "opt_tcpdebug.h" @@ -483,9 +483,9 @@ tcp_connect(tp, nam, p) struct tcpcb *otp; struct sockaddr_in *sin = (struct sockaddr_in *)nam; struct sockaddr_in *ifaddr; - int error; struct rmxp_tao *taop; struct rmxp_tao tao_noncached; + int error; if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, p); @@ -501,7 +501,7 @@ tcp_connect(tp, nam, p) error = in_pcbladdr(inp, nam, &ifaddr); if (error) return error; - oinp = in_pcblookuphash(inp->inp_pcbinfo, + oinp = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : ifaddr->sin_addr, diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index a9a9b36..e1bc97f 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 - * $Id: tcp_var.h,v 1.38 1997/02/22 09:41:43 peter Exp $ + * $Id: tcp_var.h,v 1.39 1997/04/27 20:01:15 wollman Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -42,18 +42,18 @@ /* * Tcp control block, one per tcp; fields: + * Organized for 16 byte cacheline efficiency. */ struct tcpcb { struct tcpiphdr *seg_next; /* sequencing queue */ struct tcpiphdr *seg_prev; - int t_state; /* state of this connection */ - int t_timer[TCPT_NTIMERS]; /* tcp timers */ - int t_rxtshift; /* log(2) of rexmt exp. backoff */ - int t_rxtcur; /* current retransmit value */ int t_dupacks; /* consecutive dup acks recd */ - u_int t_maxseg; /* maximum segment size */ - u_int t_maxopd; /* mss plus options */ - int t_force; /* 1 if forcing out a byte */ + struct tcpiphdr *t_template; /* skeletal packet for transmit */ + + int t_timer[TCPT_NTIMERS]; /* tcp timers */ + + struct inpcb *t_inpcb; /* back pointer to internet pcb */ + int t_state; /* state of this connection */ u_int t_flags; #define TF_ACKNOW 0x0001 /* ack peer immediately */ #define TF_DELACK 0x0002 /* ack, but try to delay it */ @@ -71,77 +71,69 @@ struct tcpcb { #define TF_REQ_CC 0x2000 /* have/will request CC */ #define TF_RCVD_CC 0x4000 /* a CC was received in SYN */ #define TF_SENDCCNEW 0x8000 /* send CCnew instead of CC in SYN */ + int t_force; /* 1 if forcing out a byte */ - struct tcpiphdr *t_template; /* skeletal packet for transmit */ - struct inpcb *t_inpcb; /* back pointer to internet pcb */ -/* - * The following fields are used as in the protocol specification. - * See RFC783, Dec. 1981, page 21. - */ -/* send sequence variables */ tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_wl1; /* window update seg seq number */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq iss; /* initial send sequence number */ - u_long snd_wnd; /* send window */ -/* receive sequence variables */ - u_long rcv_wnd; /* receive window */ - tcp_seq rcv_nxt; /* receive next */ - tcp_seq rcv_up; /* receive urgent pointer */ tcp_seq irs; /* initial receive sequence number */ -/* - * Additional variables for this implementation. - */ -/* receive variables */ + + tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ -/* retransmit variables */ - tcp_seq snd_max; /* highest sequence number sent; - * used to recognize retransmits - */ -/* congestion control (for slow start, source quench, retransmit after loss) */ + u_long rcv_wnd; /* receive window */ + tcp_seq rcv_up; /* receive urgent pointer */ + + u_long snd_wnd; /* send window */ u_long snd_cwnd; /* congestion-controlled window */ u_long snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ -/* - * transmit timing stuff. See below for scale of srtt and rttvar. - * "Variance" is actually smoothed difference. - */ + u_int t_maxopd; /* mss plus options */ + u_int t_idle; /* inactivity time */ + u_long t_duration; /* connection duration */ int t_rtt; /* round trip time */ tcp_seq t_rtseq; /* sequence number being timed */ + + int t_rxtcur; /* current retransmit value */ + u_int t_maxseg; /* maximum segment size */ int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ + + int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ + u_long t_rttupdated; /* number of times rtt sampled */ u_long max_sndwnd; /* largest window peer has offered */ + int t_softerror; /* possible error not yet reported */ /* out-of-band data */ char t_oobflags; /* have some */ char t_iobc; /* input character */ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 - int t_softerror; /* possible error not yet reported */ - /* RFC 1323 variables */ u_char snd_scale; /* window scaling for send window */ u_char rcv_scale; /* window scaling for recv window */ u_char request_r_scale; /* pending window scaling */ u_char requested_s_scale; u_long ts_recent; /* timestamp echo data */ + u_long ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; /* RFC 1644 variables */ tcp_cc cc_send; /* send connection count */ tcp_cc cc_recv; /* receive connection count */ - u_long t_duration; /* connection duration */ /* TUBA stuff */ caddr_t t_tuba_pcb; /* next level down pcb for TCP over z */ -/* More RTT stuff */ - u_long t_rttupdated; /* number of times rtt sampled */ }; /* diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 91981c1..773e42d 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 - * $Id: udp_usrreq.c,v 1.42 1997/12/19 23:46:21 bde Exp $ + * $Id: udp_usrreq.c,v 1.43 1998/01/25 17:25:41 steve Exp $ */ #include <sys/param.h> @@ -78,7 +78,7 @@ static struct inpcbhead udb; /* from udp_var.h */ static struct inpcbinfo udbinfo; #ifndef UDBHASHSIZE -#define UDBHASHSIZE 64 +#define UDBHASHSIZE 16 #endif static struct udpstat udpstat; /* from udp_var.h */ @@ -97,6 +97,7 @@ udp_init() LIST_INIT(&udb); udbinfo.listhead = &udb; udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); + udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.porthashmask); } void @@ -274,7 +275,7 @@ udp_input(m, iphlen) /* * Locate pcb for datagram. */ - inp = in_pcblookuphash(&udbinfo, ip->ip_src, uh->uh_sport, + inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, 1); if (inp == NULL) { if (log_in_vain) { |