summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/uipc_usrreq.c62
-rw-r--r--sys/sys/sockbuf.h17
-rw-r--r--sys/sys/unpcb.h4
-rw-r--r--tests/sys/kern/unix_seqpacket_test.c17
4 files changed, 40 insertions, 60 deletions
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 676029a..87322da 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -51,7 +51,6 @@
*
* TODO:
* RDM
- * distinguish datagram size limits from flow control limits in SEQPACKET
* rethink name space problems
* need a proper out-of-band
*/
@@ -789,7 +788,6 @@ uipc_rcvd(struct socket *so, int flags)
struct unpcb *unp, *unp2;
struct socket *so2;
u_int mbcnt, sbcc;
- u_long newhiwat;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
@@ -811,6 +809,15 @@ uipc_rcvd(struct socket *so, int flags)
mbcnt = so->so_rcv.sb_mbcnt;
sbcc = so->so_rcv.sb_cc;
SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * There is a benign race condition at this point. If we're planning to
+ * clear SB_STOP, but uipc_send is called on the connected socket at
+ * this instant, it might add data to the sockbuf and set SB_STOP. Then
+ * we would erroneously clear SB_STOP below, even though the sockbuf is
+ * full. The race is benign because the only ill effect is to allow the
+ * sockbuf to exceed its size limit, and the size limits are not
+ * strictly guaranteed anyway.
+ */
UNP_PCB_LOCK(unp);
unp2 = unp->unp_conn;
if (unp2 == NULL) {
@@ -819,13 +826,9 @@ uipc_rcvd(struct socket *so, int flags)
}
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_snd);
- so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
- newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
- (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
+ if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
+ so2->so_snd.sb_flags &= ~SB_STOP;
sowwakeup_locked(so2);
- unp->unp_mbcnt = mbcnt;
- unp->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp);
return (0);
}
@@ -836,8 +839,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
{
struct unpcb *unp, *unp2;
struct socket *so2;
- u_int mbcnt_delta, sbcc;
- u_int newhiwat;
+ u_int mbcnt, sbcc;
int error = 0;
unp = sotounpcb(so);
@@ -991,27 +993,21 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
}
}
- /*
- * XXXRW: While fine for SOCK_STREAM, this conflates maximum
- * datagram size and back-pressure for SOCK_SEQPACKET, which
- * can lead to undesired return of EMSGSIZE on send instead
- * of more desirable blocking.
- */
- mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
- unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
+ mbcnt = so2->so_rcv.sb_mbcnt;
sbcc = so2->so_rcv.sb_cc;
sorwakeup_locked(so2);
+ /*
+ * The PCB lock on unp2 protects the SB_STOP flag. Without it,
+ * it would be possible for uipc_rcvd to be called at this
+ * point, drain the receiving sockbuf, clear SB_STOP, and then
+ * we would set SB_STOP below. That could lead to an empty
+ * sockbuf having SB_STOP set
+ */
SOCKBUF_LOCK(&so->so_snd);
- if ((int)so->so_snd.sb_hiwat >= (int)(sbcc - unp2->unp_cc))
- newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
- else
- newhiwat = 0;
- (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
- so->so_snd.sb_mbmax -= mbcnt_delta;
+ if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
+ so->so_snd.sb_flags |= SB_STOP;
SOCKBUF_UNLOCK(&so->so_snd);
- unp2->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp2);
m = NULL;
break;
@@ -1049,27 +1045,18 @@ release:
static int
uipc_sense(struct socket *so, struct stat *sb)
{
- struct unpcb *unp, *unp2;
- struct socket *so2;
+ struct unpcb *unp;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
sb->st_blksize = so->so_snd.sb_hiwat;
- UNP_LINK_RLOCK();
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
- unp2 != NULL) {
- so2 = unp2->unp_socket;
- sb->st_blksize += so2->so_rcv.sb_cc;
- }
sb->st_dev = NODEV;
if (unp->unp_ino == 0)
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
UNP_PCB_UNLOCK(unp);
- UNP_LINK_RUNLOCK();
return (0);
}
@@ -2497,8 +2484,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb)
/* XXXRW: Would be nice to print the full address, if any. */
db_printf("unp_addr: %p\n", unp->unp_addr);
- db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n",
- unp->unp_cc, unp->unp_mbcnt,
+ db_printf("unp_gencnt: %llu\n",
(unsigned long long)unp->unp_gencnt);
db_printf("unp_flags: %x (", unp->unp_flags);
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 6994fd3..877b530 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -52,6 +52,7 @@
#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
+#define SB_STOP 0x1000 /* backpressure indicator */
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
@@ -168,9 +169,19 @@ void sbunlock(struct sockbuf *sb);
* still be negative (cc > hiwat or mbcnt > mbmax). Should detect
* overflow and return 0. Should use "lmin" but it doesn't exist now.
*/
-#define sbspace(sb) \
- ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
- (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
+static __inline
+long
+sbspace(struct sockbuf *sb)
+{
+ long bleft;
+ long mleft;
+
+ if (sb->sb_flags & SB_STOP)
+ return(0);
+ bleft = sb->sb_hiwat - sb->sb_cc;
+ mleft = sb->sb_mbmax - sb->sb_mbcnt;
+ return((bleft < mleft) ? bleft : mleft);
+}
/* adjust counters in sb reflecting allocation of m */
#define sballoc(sb, m) { \
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
index 4d69f3e..ba63f30 100644
--- a/sys/sys/unpcb.h
+++ b/sys/sys/unpcb.h
@@ -74,8 +74,8 @@ struct unpcb {
struct unp_head unp_refs; /* referencing socket linked list */
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
- int unp_cc; /* copy of rcv.sb_cc */
- int unp_mbcnt; /* copy of rcv.sb_mbcnt */
+ int reserved1;
+ int reserved2;
unp_gen_t unp_gencnt; /* generation count of this instance */
short unp_flags; /* flags */
short unp_gcflag; /* Garbage collector flags. */
diff --git a/tests/sys/kern/unix_seqpacket_test.c b/tests/sys/kern/unix_seqpacket_test.c
index 102535b7..0ccd639 100644
--- a/tests/sys/kern/unix_seqpacket_test.c
+++ b/tests/sys/kern/unix_seqpacket_test.c
@@ -844,25 +844,21 @@ ATF_TC_BODY(emsgsize_nonblocking, tc)
ATF_TC_WITHOUT_HEAD(eagain_8k_8k);
ATF_TC_BODY(eagain_8k_8k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_eagain(8192, 8192);
}
ATF_TC_WITHOUT_HEAD(eagain_8k_128k);
ATF_TC_BODY(eagain_8k_128k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_eagain(8192, 131072);
}
ATF_TC_WITHOUT_HEAD(eagain_128k_8k);
ATF_TC_BODY(eagain_128k_8k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_eagain(131072, 8192);
}
ATF_TC_WITHOUT_HEAD(eagain_128k_128k);
ATF_TC_BODY(eagain_128k_128k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_eagain(131072, 131072);
}
@@ -969,37 +965,24 @@ ATF_TC_BODY(pipe_simulator_128k_128k, tc)
ATF_TC_WITHOUT_HEAD(pipe_8k_8k);
ATF_TC_BODY(pipe_8k_8k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_pipe(8192, 8192);
}
ATF_TC_WITHOUT_HEAD(pipe_8k_128k);
ATF_TC_BODY(pipe_8k_128k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_pipe(8192, 131072);
}
ATF_TC_WITHOUT_HEAD(pipe_128k_8k);
ATF_TC_BODY(pipe_128k_8k, tc)
{
- /*
- * kern/185812 causes this test case to both fail and timeout. The
- * atf-c-api(3) doesn't have a way to set such an expectation.
- * If you use atf_tc_expect_fail, then it will timeout. If you use
- * atf_tc_expect_timeout, then it will fail. If you use both, then it
- * will show up as an unexpected pass, which is much worse
- *
- * https://code.google.com/p/kyua/issues/detail?id=76
- */
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_pipe(131072, 8192);
}
ATF_TC_WITHOUT_HEAD(pipe_128k_128k);
ATF_TC_BODY(pipe_128k_128k, tc)
{
- atf_tc_expect_fail("PR kern/185812 send(2) on a UNIX domain SEQPACKET socket returns EMSGSIZE instead of EAGAIN");
test_pipe(131072, 131072);
}
OpenPOWER on IntegriCloud