From 1b1f693d7ad6d193862dcb1118540a030c5e761f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Oct 2010 15:40:55 +0000 Subject: net: fix rds_iovec page count overflow As reported by Thomas Pollet, the rdma page counting can overflow. We get the rdma sizes in 64-bit unsigned entities, but then limit it to UINT_MAX bytes and shift them down to pages (so with a possible "+1" for an unaligned address). So each individual page count fits comfortably in an 'unsigned int' (not even close to overflowing into signed), but as they are added up, they might end up resulting in a signed return value. Which would be wrong. Catch the case of tot_pages turning negative, and return the appropriate error code. Reported-by: Thomas Pollet Signed-off-by: Linus Torvalds Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 1a41deb..0df02c8 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -502,6 +502,13 @@ static int rds_rdma_pages(struct rds_rdma_args *args) return -EINVAL; tot_pages += nr_pages; + + /* + * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, + * so tot_pages cannot overflow without first going negative. + */ + if ((int)tot_pages < 0) + return -EINVAL; } return tot_pages; -- cgit v1.1 From a09f69c49b84b161ebd4dd09d3cce1b68297f1d3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 28 Oct 2010 15:40:56 +0000 Subject: RDS: Return -EINVAL if rds_rdma_pages returns an error rds_cmsg_rdma_args would still return success even if rds_rdma_pages returned an error (or overflowed). Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 0df02c8..d0ba2ca 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -554,8 +554,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, } nr_pages = rds_rdma_pages(args); - if (nr_pages < 0) + if (nr_pages < 0) { + ret = -EINVAL; goto out; + } pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) { -- cgit v1.1 From f4a3fc03c1d73753879fb655b8cd628b29f6706b Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 28 Oct 2010 15:40:57 +0000 Subject: RDS: Clean up error handling in rds_cmsg_rdma_args We don't need to set ret = 0 at the end -- it's initialized to 0. Also, don't increment s_send_rdma stat if we're exiting with an error. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index d0ba2ca..334acdd 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -664,13 +664,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, } op->op_bytes = nr_bytes; - ret = 0; out: kfree(pages); if (ret) rds_rdma_free_op(op); - - rds_stats_inc(s_send_rdma); + else + rds_stats_inc(s_send_rdma); return ret; } -- cgit v1.1 From fc8162e3c034af743d8def435fda6396603d321f Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 28 Oct 2010 15:40:58 +0000 Subject: RDS: Copy rds_iovecs into kernel memory instead of rereading from userspace Change rds_rdma_pages to take a passed-in rds_iovec array instead of doing copy_from_user itself. Change rds_cmsg_rdma_args to copy rds_iovec array once only. This eliminates the possibility of userspace changing it after our sanity checks. Implement stack-based storage for small numbers of iovecs, based on net/socket.c, to save an alloc in the extremely common case. Although this patch reduces iovec copies in cmsg_rdma_args to 1, we still do another one in rds_rdma_extra_size. Getting rid of that one will be trickier, so it'll be a separate patch. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 104 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 39 deletions(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 334acdd..caa4d98 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao) /* - * Count the number of pages needed to describe an incoming iovec. + * Count the number of pages needed to describe an incoming iovec array. */ -static int rds_rdma_pages(struct rds_rdma_args *args) +static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs) +{ + int tot_pages = 0; + unsigned int nr_pages; + unsigned int i; + + /* figure out the number of pages in the vector */ + for (i = 0; i < nr_iovecs; i++) { + nr_pages = rds_pages_in_vec(&iov[i]); + if (nr_pages == 0) + return -EINVAL; + + tot_pages += nr_pages; + + /* + * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, + * so tot_pages cannot overflow without first going negative. + */ + if (tot_pages < 0) + return -EINVAL; + } + + return tot_pages; +} + +int rds_rdma_extra_size(struct rds_rdma_args *args) { struct rds_iovec vec; struct rds_iovec __user *local_vec; - unsigned int tot_pages = 0; + int tot_pages = 0; unsigned int nr_pages; unsigned int i; @@ -507,16 +532,11 @@ static int rds_rdma_pages(struct rds_rdma_args *args) * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, * so tot_pages cannot overflow without first going negative. */ - if ((int)tot_pages < 0) + if (tot_pages < 0) return -EINVAL; } - return tot_pages; -} - -int rds_rdma_extra_size(struct rds_rdma_args *args) -{ - return rds_rdma_pages(args) * sizeof(struct scatterlist); + return tot_pages * sizeof(struct scatterlist); } /* @@ -527,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg) { struct rds_rdma_args *args; - struct rds_iovec vec; struct rm_rdma_op *op = &rm->rdma; int nr_pages; unsigned int nr_bytes; struct page **pages = NULL; - struct rds_iovec __user *local_vec; - unsigned int nr; + struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack; + int iov_size; unsigned int i, j; int ret = 0; @@ -553,7 +572,22 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out; } - nr_pages = rds_rdma_pages(args); + /* Check whether to allocate the iovec area */ + iov_size = args->nr_local * sizeof(struct rds_iovec); + if (args->nr_local > UIO_FASTIOV) { + iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); + if (!iovs) { + ret = -ENOMEM; + goto out; + } + } + + if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) { + ret = -EFAULT; + goto out; + } + + nr_pages = rds_rdma_pages(iovs, args->nr_local); if (nr_pages < 0) { ret = -EINVAL; goto out; @@ -606,50 +640,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, (unsigned long long)args->remote_vec.addr, op->op_rkey); - local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; - for (i = 0; i < args->nr_local; i++) { - if (copy_from_user(&vec, &local_vec[i], - sizeof(struct rds_iovec))) { - ret = -EFAULT; - goto out; - } - - nr = rds_pages_in_vec(&vec); - if (nr == 0) { - ret = -EINVAL; - goto out; - } + struct rds_iovec *iov = &iovs[i]; + /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */ + unsigned int nr = rds_pages_in_vec(iov); - rs->rs_user_addr = vec.addr; - rs->rs_user_bytes = vec.bytes; + rs->rs_user_addr = iov->addr; + rs->rs_user_bytes = iov->bytes; /* If it's a WRITE operation, we want to pin the pages for reading. * If it's a READ operation, we need to pin the pages for writing. */ - ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); + ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); if (ret < 0) goto out; - rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", - nr_bytes, nr, vec.bytes, vec.addr); + rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n", + nr_bytes, nr, iov->bytes, iov->addr); - nr_bytes += vec.bytes; + nr_bytes += iov->bytes; for (j = 0; j < nr; j++) { - unsigned int offset = vec.addr & ~PAGE_MASK; + unsigned int offset = iov->addr & ~PAGE_MASK; struct scatterlist *sg; sg = &op->op_sg[op->op_nents + j]; sg_set_page(sg, pages[j], - min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), + min_t(unsigned int, iov->bytes, PAGE_SIZE - offset), offset); - rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", - sg->offset, sg->length, vec.addr, vec.bytes); + rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n", + sg->offset, sg->length, iov->addr, iov->bytes); - vec.addr += sg->length; - vec.bytes -= sg->length; + iov->addr += sg->length; + iov->bytes -= sg->length; } op->op_nents += nr; @@ -665,6 +689,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_bytes = nr_bytes; out: + if (iovs != iovstack) + sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); kfree(pages); if (ret) rds_rdma_free_op(op); -- cgit v1.1 From d139ff0907dac9ef72fb2cf301e345bac3aec42f Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 28 Oct 2010 15:40:59 +0000 Subject: RDS: Let rds_message_alloc_sgs() return NULL Even with the previous fix, we still are reading the iovecs once to determine SGs needed, and then again later on. Preallocating space for sg lists as part of rds_message seemed like a good idea but it might be better to not do this. While working to redo that code, this patch attempts to protect against userspace rewriting the rds_iovec array between the first and second accesses. The consequences of this would be either a too-small or too-large sg list array. Too large is not an issue. This patch changes all callers of message_alloc_sgs to handle running out of preallocated sgs, and fail gracefully. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/message.c | 5 +++++ net/rds/rdma.c | 8 ++++++++ net/rds/send.c | 4 ++++ 3 files changed, 17 insertions(+) (limited to 'net/rds') diff --git a/net/rds/message.c b/net/rds/message.c index a84545d..848cff4 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs); WARN_ON(!nents); + if (rm->m_used_sgs + nents > rm->m_total_sgs) + return NULL; + sg_ret = &sg_first[rm->m_used_sgs]; sg_init_table(sg_ret, nents); rm->m_used_sgs += nents; @@ -246,6 +249,8 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = ceil(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (!rm->data.op_sg) + return ERR_PTR(-ENOMEM); for (i = 0; i < rm->data.op_nents; ++i) { sg_set_page(&rm->data.op_sg[i], diff --git a/net/rds/rdma.c b/net/rds/rdma.c index caa4d98..8920f2a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -607,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_recverr = rs->rs_recverr; WARN_ON(!nr_pages); op->op_sg = rds_message_alloc_sgs(rm, nr_pages); + if (!op->op_sg) { + ret = -ENOMEM; + goto out; + } if (op->op_notify || op->op_recverr) { /* We allocate an uninitialized notifier here, because @@ -807,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + if (!rm->atomic.op_sg) { + ret = -ENOMEM; + goto err; + } /* verify 8 byte-aligned */ if (args->local_addr & 0x7) { diff --git a/net/rds/send.c b/net/rds/send.c index 0bc9db1..35b9c2e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, /* Attach data to the rm */ if (payload_len) { rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); + if (!rm->data.op_sg) { + ret = -ENOMEM; + goto out; + } ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len); if (ret) goto out; -- cgit v1.1 From 58c490babd4b425310363cbd1f406d7e508f77a5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 2 Nov 2010 01:52:05 +0000 Subject: rds: Lost locking in loop connection freeing The conn is removed from list in there and this requires proper lock protection. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/rds/loop.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/rds') diff --git a/net/rds/loop.c b/net/rds/loop.c index c390156..aeec1d4 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) static void rds_loop_conn_free(void *arg) { struct rds_loop_connection *lc = arg; + unsigned long flags; + rdsdebug("lc %p\n", lc); + spin_lock_irqsave(&loop_conns_lock, flags); list_del(&lc->loop_node); + spin_unlock_irqrestore(&loop_conns_lock, flags); kfree(lc); } -- cgit v1.1 From 8200a59f24aeca379660f80658a8c0c343ca5c31 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 2 Nov 2010 01:54:01 +0000 Subject: rds: Remove kfreed tcp conn from list All the rds_tcp_connection objects are stored list, but when being freed it should be removed from there. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/rds/tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/rds') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 08a8c6c..8e0a320 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; + unsigned long flags; rdsdebug("freeing tc %p\n", tc); + + spin_lock_irqsave(&rds_tcp_conn_lock, flags); + list_del(&tc->t_tcp_node); + spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); + kmem_cache_free(rds_tcp_conn_slab, tc); } -- cgit v1.1 From aa58163a76a3aef33c7220931543d45d0fe43753 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 8 Nov 2010 06:20:50 +0000 Subject: rds: Fix rds message leak in rds_message_map_pages The sgs allocation error path leaks the allocated message. Signed-off-by: Pavel Emelyanov Acked-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/message.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/message.c b/net/rds/message.c index 848cff4..1fd3d29 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -249,8 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = ceil(total_len, PAGE_SIZE); rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); - if (!rm->data.op_sg) + if (!rm->data.op_sg) { + rds_message_put(rm); return ERR_PTR(-ENOMEM); + } for (i = 0; i < rm->data.op_nents; ++i) { sg_set_page(&rm->data.op_sg[i], -- cgit v1.1 From 218854af84038d828a32f061858b1902ed2beec6 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 17 Nov 2010 06:37:16 +0000 Subject: rds: Integer overflow in RDS cmsg handling In rds_cmsg_rdma_args(), the user-provided args->nr_local value is restricted to less than UINT_MAX. This seems to need a tighter upper bound, since the calculation of total iov_size can overflow, resulting in a small sock_kmalloc() allocation. This would probably just result in walking off the heap and crashing when calling rds_rdma_pages() with a high count value. If it somehow doesn't crash here, then memory corruption could occur soon after. Signed-off-by: Dan Rosenberg Signed-off-by: David S. Miller --- net/rds/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8920f2a..4e37c1c 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -567,7 +567,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out; } - if (args->nr_local > (u64)UINT_MAX) { + if (args->nr_local > UIO_MAXIOV) { ret = -EMSGSIZE; goto out; } -- cgit v1.1