diff options
-rw-r--r-- | sys/kern/uipc_usrreq.c | 62 | ||||
-rw-r--r-- | sys/sys/sockbuf.h | 17 | ||||
-rw-r--r-- | sys/sys/unpcb.h | 4 | ||||
-rw-r--r-- | tests/sys/kern/unix_seqpacket_test.c | 17 |
4 files changed, 40 insertions, 60 deletions
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 676029a..87322da 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -51,7 +51,6 @@ * * TODO: * RDM - * distinguish datagram size limits from flow control limits in SEQPACKET * rethink name space problems * need a proper out-of-band */ @@ -789,7 +788,6 @@ uipc_rcvd(struct socket *so, int flags) struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; - u_long newhiwat; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); @@ -811,6 +809,15 @@ uipc_rcvd(struct socket *so, int flags) mbcnt = so->so_rcv.sb_mbcnt; sbcc = so->so_rcv.sb_cc; SOCKBUF_UNLOCK(&so->so_rcv); + /* + * There is a benign race condition at this point. If we're planning to + * clear SB_STOP, but uipc_send is called on the connected socket at + * this instant, it might add data to the sockbuf and set SB_STOP. Then + * we would erroneously clear SB_STOP below, even though the sockbuf is + * full. The race is benign because the only ill effect is to allow the + * sockbuf to exceed its size limit, and the size limits are not + * strictly guaranteed anyway. + */ UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 == NULL) { @@ -819,13 +826,9 @@ uipc_rcvd(struct socket *so, int flags) } so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_snd); - so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; - newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; - (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, - newhiwat, RLIM_INFINITY); + if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax) + so2->so_snd.sb_flags &= ~SB_STOP; sowwakeup_locked(so2); - unp->unp_mbcnt = mbcnt; - unp->unp_cc = sbcc; UNP_PCB_UNLOCK(unp); return (0); } @@ -836,8 +839,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, { struct unpcb *unp, *unp2; struct socket *so2; - u_int mbcnt_delta, sbcc; - u_int newhiwat; + u_int mbcnt, sbcc; int error = 0; unp = sotounpcb(so); @@ -991,27 +993,21 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, } } - /* - * XXXRW: While fine for SOCK_STREAM, this conflates maximum - * datagram size and back-pressure for SOCK_SEQPACKET, which - * can lead to undesired return of EMSGSIZE on send instead - * of more desirable blocking. - */ - mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; - unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; + mbcnt = so2->so_rcv.sb_mbcnt; sbcc = so2->so_rcv.sb_cc; sorwakeup_locked(so2); + /* + * The PCB lock on unp2 protects the SB_STOP flag. Without it, + * it would be possible for uipc_rcvd to be called at this + * point, drain the receiving sockbuf, clear SB_STOP, and then + * we would set SB_STOP below. That could lead to an empty + * sockbuf having SB_STOP set + */ SOCKBUF_LOCK(&so->so_snd); - if ((int)so->so_snd.sb_hiwat >= (int)(sbcc - unp2->unp_cc)) - newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc); - else - newhiwat = 0; - (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, - newhiwat, RLIM_INFINITY); - so->so_snd.sb_mbmax -= mbcnt_delta; + if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax) + so->so_snd.sb_flags |= SB_STOP; SOCKBUF_UNLOCK(&so->so_snd); - unp2->unp_cc = sbcc; UNP_PCB_UNLOCK(unp2); m = NULL; break; @@ -1049,27 +1045,18 @@ release: static int uipc_sense(struct socket *so, struct stat *sb) { - struct unpcb *unp, *unp2; - struct socket *so2; + struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); sb->st_blksize = so->so_snd.sb_hiwat; - UNP_LINK_RLOCK(); UNP_PCB_LOCK(unp); - unp2 = unp->unp_conn; - if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) && - unp2 != NULL) { - so2 = unp2->unp_socket; - sb->st_blksize += so2->so_rcv.sb_cc; - } sb->st_dev = NODEV; if (unp->unp_ino == 0) unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; UNP_PCB_UNLOCK(unp); - UNP_LINK_RUNLOCK(); return (0); } @@ -2497,8 +2484,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb) /* XXXRW: Would be nice to print the full address, if any. */ db_printf("unp_addr: %p\n", unp->unp_addr); - db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n", - unp->unp_cc, unp->unp_mbcnt, + db_printf("unp_gencnt: %llu\n", (unsigned long long)unp->unp_gencnt); db_printf("unp_flags: %x (", unp->unp_flags); diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 6994fd3..877b530 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -52,6 +52,7 @@ #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ +#define SB_STOP 0x1000 /* backpressure indicator */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ @@ -168,9 +169,19 @@ void sbunlock(struct sockbuf *sb); * still be negative (cc > hiwat or mbcnt > mbmax). Should detect * overflow and return 0. Should use "lmin" but it doesn't exist now. */ -#define sbspace(sb) \ - ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \ - (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) +static __inline +long +sbspace(struct sockbuf *sb) +{ + long bleft; + long mleft; + + if (sb->sb_flags & SB_STOP) + return(0); + bleft = sb->sb_hiwat - sb->sb_cc; + mleft = sb->sb_mbmax - sb->sb_mbcnt; + return((bleft < mleft) ? bleft : mleft); +} /* adjust counters in sb reflecting allocation of m */ #define sballoc(sb, m) { \ diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index 4d69f3e..ba63f30 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -74,8 +74,8 @@ struct unpcb { struct unp_head unp_refs; /* referencing socket linked list */ LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */ struct sockaddr_un *unp_addr; /* bound address of socket */ - int unp_cc; /* copy of rcv.sb_cc */ - int unp_mbcnt; /* copy of rcv.sb_mbcnt */ + int reserved1; + int reserved2; unp_gen_t unp_gencnt; /* generation count of this instance */ short unp_flags; /* flags */ short unp_gcflag; /* Garbage collector flags. */ diff --git a/tests/sys/kern/unix_seqpacket_test.c b/tests/sys/kern/unix_seqpacket_test.c index 102535b7..0ccd639 100644 --- a/tests/sys/kern/unix_seqpacket_test.c +++ b/tests/sys/kern/unix_seqpacket_test.c @@ -844,25 +844,21 @@ ATF_TC_BODY(emsgsize_nonblocking, tc) ATF_TC_WITHOUT_HEAD(eagain_8k_8k); ATF_TC_BODY(eagain_8k_8k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_eagain(8192, 8192); } ATF_TC_WITHOUT_HEAD(eagain_8k_128k); ATF_TC_BODY(eagain_8k_128k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_eagain(8192, 131072); } ATF_TC_WITHOUT_HEAD(eagain_128k_8k); ATF_TC_BODY(eagain_128k_8k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_eagain(131072, 8192); } ATF_TC_WITHOUT_HEAD(eagain_128k_128k); ATF_TC_BODY(eagain_128k_128k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_eagain(131072, 131072); } @@ -969,37 +965,24 @@ ATF_TC_BODY(pipe_simulator_128k_128k, tc) ATF_TC_WITHOUT_HEAD(pipe_8k_8k); ATF_TC_BODY(pipe_8k_8k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_pipe(8192, 8192); } ATF_TC_WITHOUT_HEAD(pipe_8k_128k); ATF_TC_BODY(pipe_8k_128k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_pipe(8192, 131072); } ATF_TC_WITHOUT_HEAD(pipe_128k_8k); ATF_TC_BODY(pipe_128k_8k, tc) { - /* - * kern/185812 causes this test case to both fail and timeout. The - * atf-c-api(3) doesn't have a way to set such an expectation. - * If you use atf_tc_expect_fail, then it will timeout. If you use - * atf_tc_expect_timeout, then it will fail. If you use both, then it - * will show up as an unexpected pass, which is much worse - * - * https://code.google.com/p/kyua/issues/detail?id=76 - */ - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_pipe(131072, 8192); } ATF_TC_WITHOUT_HEAD(pipe_128k_128k); ATF_TC_BODY(pipe_128k_128k, tc) { - atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN"); test_pipe(131072, 131072); } |