From a8866ff6a5bce7d0ec465a63bc482a85c09b0d39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 12 Dec 2014 23:02:36 -0500 Subject: netlink: make the check for "send from tx_ring" deterministic As it is, zero msg_iovlen means that the first iovec in the kernel array of iovecs is left uninitialized, so checking if its ->iov_base is NULL is random. Since the real users of that thing are doing sendto(fd, NULL, 0, ...), they are getting msg_iovlen = 1 and msg_iov[0] = {NULL, 0}, which is what this test is trying to catch. As suggested by davem, let's just check that msg_iovlen was 1 and msg_iov[0].iov_base was NULL - _that_ is well-defined and it catches what we want to catch. Signed-off-by: Al Viro --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a36777b..4fd38a6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2298,7 +2298,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } + /* It's a really convoluted way for userland to ask for mmaped + * sendmsg(), but that's what we've got... + */ if (netlink_tx_is_mmaped(sk) && + msg->msg_iter.type == ITER_IOVEC && + msg->msg_iter.nr_segs == 1 && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, &scm); -- cgit v1.1 From 7ae9abfd9d6f3216500fc2874254e726cc30ca01 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 19:30:51 -0500 Subject: ipv4: raw_send_hdrinc(): pass msghdr Switch from passing msg->iov_iter.iov to passing msg itself Signed-off-by: Al Viro --- net/ipv4/raw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0bb68df..2c9d2520 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -337,7 +337,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - void *from, size_t length, + struct msghdr *msg, size_t length, struct rtable **rtp, unsigned int flags) { @@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->transport_header = skb->network_header; err = -EFAULT; - if (memcpy_fromiovecend((void *)iph, from, 0, length)) + if (memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length)) goto error_free; iphlen = iph->ihl * 4; @@ -625,8 +625,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len, + err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags); else { -- cgit v1.1 From c3c1a7dbe24282ab8999cb8c964dc6371cde3ea3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 19:36:28 -0500 Subject: ipv6: rawv6_send_hdrinc(): pass msghdr Switch from passing msg->iov_iter.iov to passing msg itself Signed-off-by: Al Viro --- net/ipv6/raw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ee25631..0dbb328 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,7 +609,7 @@ out: return err; } -static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, +static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { @@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); + err = memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length); if (err) goto error_fault; @@ -886,8 +886,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, -- cgit v1.1 From 4c946d9c11d173c2ea6b9081b248f8072e6b46f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 19:52:04 -0500 Subject: vmci: propagate msghdr all way down to __qp_memcpy_to_queue() Switch from passing msg->iov_iter.iov to passing msg itself Signed-off-by: Al Viro --- drivers/misc/vmw_vmci/vmci_queue_pair.c | 16 ++++++++-------- include/linux/vmw_vmci_api.h | 2 +- net/vmw_vsock/vmci_transport.c | 3 +-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index 7aaaf51..35f19a6 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -370,12 +370,12 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue, to_copy = size - bytes_copied; if (is_iovec) { - struct iovec *iov = (struct iovec *)src; + struct msghdr *msg = (struct msghdr *)src; int err; /* The iovec will track bytes_copied internally. */ - err = memcpy_fromiovec((u8 *)va + page_offset, - iov, to_copy); + err = memcpy_from_msg((u8 *)va + page_offset, + msg, to_copy); if (err != 0) { if (kernel_if->host) kunmap(kernel_if->u.h.page[page_index]); @@ -580,7 +580,7 @@ static int qp_memcpy_from_queue(void *dest, */ static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, u64 queue_offset, - const void *src, + const void *msg, size_t src_offset, size_t size) { @@ -588,7 +588,7 @@ static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, * We ignore src_offset because src is really a struct iovec * and will * maintain offset internally. */ - return __qp_memcpy_to_queue(queue, queue_offset, src, size, true); + return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true); } /* @@ -3223,13 +3223,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_peek); * of bytes enqueued or < 0 on error. */ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, - void *iov, + struct msghdr *msg, size_t iov_size, int buf_type) { ssize_t result; - if (!qpair || !iov) + if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); @@ -3238,7 +3238,7 @@ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, result = qp_enqueue_locked(qpair->produce_q, qpair->consume_q, qpair->produce_q_size, - iov, iov_size, + msg, iov_size, qp_memcpy_to_queue_iov); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index 5691f75..63df3a2a 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -74,7 +74,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, int mode); ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, - void *iov, size_t iov_size, int mode); + struct msghdr *msg, size_t iov_size, int mode); ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int mode); ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 02d2e52..7f32550 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - /* XXX: stripping const */ - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0); + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) -- cgit v1.1 From af2b040e470b470bfc881981db3c796072853eae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 21:44:24 -0500 Subject: rxrpc: switch rxrpc_send_data() to iov_iter primitives Convert skb_add_data() to iov_iter; allows to get rid of the explicit messing with iovec in its only caller - skb_add_data() will keep advancing ->msg_iter for us, so there's no need to similate that manually. Signed-off-by: Al Viro --- include/linux/skbuff.h | 11 +++++------ net/rxrpc/ar-output.c | 43 ++++++++++--------------------------------- 2 files changed, 15 insertions(+), 39 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 85ab7d7..9a8bafe 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2484,19 +2484,18 @@ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) } static inline int skb_add_data(struct sk_buff *skb, - char __user *from, int copy) + struct iov_iter *from, int copy) { const int off = skb->len; if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), - copy, 0, &err); - if (!err) { + __wsum csum = 0; + if (csum_and_copy_from_iter(skb_put(skb, copy), copy, + &csum, from) == copy) { skb->csum = csum_block_add(skb->csum, csum, off); return 0; } - } else if (!copy_from_user(skb_put(skb, copy), from, copy)) + } else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy) return 0; __skb_trim(skb, off); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index e1a9373e..963a5b9 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -529,13 +529,11 @@ static int rxrpc_send_data(struct kiocb *iocb, struct msghdr *msg, size_t len) { struct rxrpc_skb_priv *sp; - unsigned char __user *from; struct sk_buff *skb; - const struct iovec *iov; struct sock *sk = &rx->sk; long timeo; bool more; - int ret, ioc, segment, copied; + int ret, copied; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -545,25 +543,17 @@ static int rxrpc_send_data(struct kiocb *iocb, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) return -EPIPE; - iov = msg->msg_iter.iov; - ioc = msg->msg_iter.nr_segs - 1; - from = iov->iov_base; - segment = iov->iov_len; - iov++; more = msg->msg_flags & MSG_MORE; skb = call->tx_pending; call->tx_pending = NULL; copied = 0; - do { + if (len > iov_iter_count(&msg->msg_iter)) + len = iov_iter_count(&msg->msg_iter); + while (len) { int copy; - if (segment > len) - segment = len; - - _debug("SEGMENT %d @%p", segment, from); - if (!skb) { size_t size, chunk, max, space; @@ -631,13 +621,13 @@ static int rxrpc_send_data(struct kiocb *iocb, /* append next segment of data to the current buffer */ copy = skb_tailroom(skb); ASSERTCMP(copy, >, 0); - if (copy > segment) - copy = segment; + if (copy > len) + copy = len; if (copy > sp->remain) copy = sp->remain; _debug("add"); - ret = skb_add_data(skb, from, copy); + ret = skb_add_data(skb, &msg->msg_iter, copy); _debug("added"); if (ret < 0) goto efault; @@ -646,18 +636,6 @@ static int rxrpc_send_data(struct kiocb *iocb, copied += copy; len -= copy; - segment -= copy; - from += copy; - while (segment == 0 && ioc > 0) { - from = iov->iov_base; - segment = iov->iov_len; - iov++; - ioc--; - } - if (len == 0) { - segment = 0; - ioc = 0; - } /* check for the far side aborting the call or a network error * occurring */ @@ -665,7 +643,7 @@ static int rxrpc_send_data(struct kiocb *iocb, goto call_aborted; /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || (segment == 0 && !more)) { + if (sp->remain <= 0 || (!len && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -711,11 +689,10 @@ static int rxrpc_send_data(struct kiocb *iocb, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, segment == 0 && !more); + rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more); skb = NULL; } - - } while (segment > 0); + } success: ret = copied; -- cgit v1.1 From 2e90b1c45e34240eeeacab0b37d5f8f739462bdc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 21:50:31 -0500 Subject: rxrpc: make the users of rxrpc_kernel_send_data() set kvec-backed msg_iter properly Use iov_iter_kvec() there, get rid of set_fs() games - now that rxrpc_send_data() uses iov_iter primitives, it'll handle ITER_KVEC just fine. Signed-off-by: Al Viro --- fs/afs/rxrpc.c | 14 +++++++------- net/rxrpc/ar-output.c | 3 --- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 06e14bf..dbc732e 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, _debug("- range %u-%u%s", offset, to, msg->msg_flags ? " [more]" : ""); - iov_iter_init(&msg->msg_iter, WRITE, - (struct iovec *) iov, 1, to - offset); + iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, + iov, 1, to - offset); /* have to change the state *before* sending the last * packet as RxRPC might give us the reply before it @@ -384,7 +384,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; @@ -770,7 +770,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, void afs_send_empty_reply(struct afs_call *call) { struct msghdr msg; - struct iovec iov[1]; + struct kvec iov[1]; _enter(""); @@ -778,7 +778,7 @@ void afs_send_empty_reply(struct afs_call *call) iov[0].iov_len = 0; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */ msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -805,7 +805,7 @@ void afs_send_empty_reply(struct afs_call *call) void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) { struct msghdr msg; - struct iovec iov[1]; + struct kvec iov[1]; int n; _enter(""); @@ -814,7 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 963a5b9..8331c95 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -232,10 +232,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - mm_segment_t oldfs = get_fs(); - set_fs(KERNEL_DS); ret = rxrpc_send_data(NULL, call->socket, call, msg, len); - set_fs(oldfs); } release_sock(&call->socket->sk); -- cgit v1.1 From cacdc7d2f9fa42e29b650e2879df42ea7d7833c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 20:34:16 -0500 Subject: ip: stash a pointer to msghdr in struct ping_fakehdr ... instead of storing its ->mgs_iter.iov there Signed-off-by: Al Viro --- include/net/ping.h | 2 +- net/ipv4/ping.c | 7 +++---- net/ipv6/ping.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/net/ping.h b/include/net/ping.h index f074060..cc16d41 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -59,7 +59,7 @@ extern struct pingv6_ops pingv6_ops; struct pingfakehdr { struct icmphdr icmph; - struct iovec *iov; + struct msghdr *msg; sa_family_t family; __wsum wcheck; }; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2a3720f..9e15ba7 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -602,14 +602,14 @@ int ping_getfrag(void *from, char *to, if (fraglen < sizeof(struct icmphdr)) BUG(); if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), - pfh->iov, 0, fraglen - sizeof(struct icmphdr), + pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), + (to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr), fraglen, &pfh->wcheck)) return -EFAULT; } @@ -811,8 +811,7 @@ back_from_confirm: pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 2d31483..bd46f73 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET6; -- cgit v1.1 From 57be5bdad759b9dde8b0d0cc630782a1a4ac4b9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 13:40:20 -0500 Subject: ip: convert tcp_sendmsg() to iov_iter primitives patch is actually smaller than it seems to be - most of it is unindenting the inner loop body in tcp_sendmsg() itself... the bit in tcp_input.c is going to get reverted very soon - that's what memcpy_from_msg() will become, but not in this commit; let's keep it reasonably contained... There's one potentially subtle change here: in case of short copy from userland, mainline tcp_send_syn_data() discards the skb it has allocated and falls back to normal path, where we'll send as much as possible after rereading the same data again. This patch trims SYN+data skb instead - that way we don't need to copy from the same place twice. Signed-off-by: Al Viro --- include/net/sock.h | 18 ++-- net/ipv4/tcp.c | 233 +++++++++++++++++++++++--------------------------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 11 ++- 4 files changed, 123 insertions(+), 141 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 1534149..1e45e59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) } static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, - char __user *from, char *to, + struct iov_iter *from, char *to, int copy, int offset) { if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); - if (err) - return err; + __wsum csum = 0; + if (csum_and_copy_from_iter(to, copy, &csum, from) != copy) + return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, offset); } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { - if (!access_ok(VERIFY_READ, from, copy) || - __copy_from_user_nocache(to, from, copy)) + if (copy_from_iter_nocache(to, copy, from) != copy) return -EFAULT; - } else if (copy_from_user(to, from, copy)) + } else if (copy_from_iter(to, copy, from) != copy) return -EFAULT; return 0; } static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, - char __user *from, int copy) + struct iov_iter *from, int copy) { int err, offset = skb->len; @@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, return err; } -static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, +static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from, struct sk_buff *skb, struct page *page, int off, int copy) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3075723..9d72a0f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - const struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags, err, copied = 0; - int mss_now = 0, size_goal, copied_syn = 0, offset = 0; + int flags, err, copied = 0; + int mss_now = 0, size_goal, copied_syn = 0; bool sg; long timeo; @@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; else if (err) goto out_err; - offset = copied_syn; } timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, mss_now = tcp_send_mss(sk, &size_goal, flags); /* Ok commence sending. */ - iovlen = msg->msg_iter.nr_segs; - iov = msg->msg_iter.iov; copied = 0; err = -EPIPE; @@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (--iovlen >= 0) { - size_t seglen = iov->iov_len; - unsigned char __user *from = iov->iov_base; + while (iov_iter_count(&msg->msg_iter)) { + int copy = 0; + int max = size_goal; - iov++; - if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ - if (offset >= seglen) { - offset -= seglen; - continue; - } - seglen -= offset; - from += offset; - offset = 0; + skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; } - while (seglen > 0) { - int copy = 0; - int max = size_goal; - - skb = tcp_write_queue_tail(sk); - if (tcp_send_head(sk)) { - if (skb->ip_summed == CHECKSUM_NONE) - max = mss_now; - copy = max - skb->len; - } - - if (copy <= 0) { + if (copy <= 0) { new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. - */ - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; + /* Allocate new segment. If the interface is SG, + * allocate skb fitting to single page. + */ + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), - sk->sk_allocation); - if (!skb) - goto wait_for_memory; + skb = sk_stream_alloc_skb(sk, + select_size(sk, sg), + sk->sk_allocation); + if (!skb) + goto wait_for_memory; - /* - * Check whether we can use HW checksum. - */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_PARTIAL; + /* + * Check whether we can use HW checksum. + */ + if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, skb); - copy = size_goal; - max = size_goal; + skb_entail(sk, skb); + copy = size_goal; + max = size_goal; - /* All packets are restored as if they have - * already been sent. skb_mstamp isn't set to - * avoid wrong rtt estimation. - */ - if (tp->repair) - TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; - } + /* All packets are restored as if they have + * already been sent. skb_mstamp isn't set to + * avoid wrong rtt estimation. + */ + if (tp->repair) + TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; + } - /* Try to append data to the end of skb. */ - if (copy > seglen) - copy = seglen; - - /* Where to copy to? */ - if (skb_availroom(skb) > 0) { - /* We have some space in skb head. Superb! */ - copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, from, copy); - if (err) - goto do_fault; - } else { - bool merge = true; - int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); - - if (!sk_page_frag_refill(sk, pfrag)) - goto wait_for_memory; - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { - tcp_mark_push(tp, skb); - goto new_segment; - } - merge = false; - } + /* Try to append data to the end of skb. */ + if (copy > iov_iter_count(&msg->msg_iter)) + copy = iov_iter_count(&msg->msg_iter); + + /* Where to copy to? */ + if (skb_availroom(skb) > 0) { + /* We have some space in skb head. Superb! */ + copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + if (err) + goto do_fault; + } else { + bool merge = true; + int i = skb_shinfo(skb)->nr_frags; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; - copy = min_t(int, copy, pfrag->size - pfrag->offset); - - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_memory; - - err = skb_copy_to_page_nocache(sk, from, skb, - pfrag->page, - pfrag->offset, - copy); - if (err) - goto do_error; - - /* Update the skb. */ - if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, copy); - get_page(pfrag->page); + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - pfrag->offset += copy; + merge = false; } - if (!copied) - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + copy = min_t(int, copy, pfrag->size - pfrag->offset); - tp->write_seq += copy; - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_memory; - from += copy; - copied += copy; - if ((seglen -= copy) == 0 && iovlen == 0) { - tcp_tx_timestamp(sk, skb); - goto out; + err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, + pfrag->page, + pfrag->offset, + copy); + if (err) + goto do_error; + + /* Update the skb. */ + if (merge) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + } else { + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } + pfrag->offset += copy; + } - if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) - continue; + if (!copied) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + + copied += copy; + if (!iov_iter_count(&msg->msg_iter)) { + tcp_tx_timestamp(sk, skb); + goto out; + } - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); - } else if (skb == tcp_send_head(sk)) - tcp_push_one(sk, mss_now); + if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) continue; + if (forced_push(tp)) { + tcp_mark_push(tp, skb); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); + } else if (skb == tcp_send_head(sk)) + tcp_push_one(sk, mss_now); + continue; + wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + if (copied) + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); - if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) - goto do_error; + if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) + goto do_error; - mss_now = tcp_send_mss(sk, &size_goal, flags); - } + mss_now = tcp_send_mss(sk, &size_goal, flags); } out: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71fb37c..93c7482 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20ab06b..722c8bc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0; + int syn_loss = 0, space, err = 0, copied; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), - fo->data->msg_iter.iov, 0, space))) { + copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { kfree_skb(syn_data); goto fallback; } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) -- cgit v1.1 From 21226abb4e9f14d88238964d89b279e461ddc30c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:48:29 -0500 Subject: net: switch memcpy_fromiovec()/memcpy_fromiovecend() users to copy_from_iter() That takes care of the majority of ->sendmsg() instances - most of them via memcpy_to_msg() or assorted getfrag() callbacks. One place where we still keep memcpy_fromiovecend() is tipc - there we potentially read the same data over and over; separate patch, that... Signed-off-by: Al Viro --- include/linux/skbuff.h | 3 +-- include/net/udplite.h | 3 +-- net/ipv4/ip_output.c | 6 ++---- net/ipv4/ping.c | 14 +++++++------- net/ipv4/raw.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv6/raw.c | 2 +- 7 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9a8bafe..b349c96 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2692,8 +2692,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - /* XXX: stripping const */ - return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len); + return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) diff --git a/include/net/udplite.h b/include/net/udplite.h index ae7c8d1..8076193 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -20,8 +20,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct msghdr *msg = from; - /* XXX: stripping const */ - return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len); + return copy_from_iter(to, len, &msg->msg_iter) != len ? -EFAULT : 0; } /* Designate sk as UDP-Lite socket */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b50861b..f998bc8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* XXX: stripping const */ - if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0) + if (copy_from_iter(to, len, &msg->msg_iter) != len) return -EFAULT; } else { __wsum csum = 0; - /* XXX: stripping const */ - if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0) + if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9e15ba7..e9f66e1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to, struct pingfakehdr *pfh = (struct pingfakehdr *)from; if (offset == 0) { - if (fraglen < sizeof(struct icmphdr)) + fraglen -= sizeof(struct icmphdr); + if (fraglen < 0) BUG(); - if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), - pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr), - &pfh->wcheck)) + if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), + fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { - if (csum_partial_copy_fromiovecend - (to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) + if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c9d2520..f027a70 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->transport_header = skb->network_header; err = -EFAULT; - if (memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length)) + if (memcpy_from_msg(iph, msg, length)) goto error_free; iphlen = iph->ihl * 4; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 93c7482..71fb37c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size) + if (memcpy_from_msg(skb_put(skb, size), msg, size)) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0dbb328..dae7f1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length); + err = memcpy_from_msg(iph, msg, length); if (err) goto error_fault; -- cgit v1.1 From f25dcc7687d42a72de18aa41b04990a24c9e77c7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:52:29 -0500 Subject: tipc: tipc ->sendmsg() conversion This one needs to copy the same data from user potentially more than once. Sadly, MTU changes can trigger that ;-/ Cc: Jon Maloy Signed-off-by: Al Viro --- net/tipc/msg.c | 7 ++----- net/tipc/socket.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 18aba9e..da67c8d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -189,7 +189,6 @@ err: * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message - * @offset: Posision in iov to start copying from * @dsz: Total length of user data * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller @@ -221,8 +220,7 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); pktpos = skb->data + mhsz; - if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, - dsz)) + if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz) return dsz; rc = -EFAULT; goto error; @@ -252,12 +250,11 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m, if (drem < pktrem) pktrem = drem; - if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) { + if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) { rc = -EFAULT; goto error; } drem -= pktrem; - offset += pktrem; if (!drem) break; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 679a220..caa4d66 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -733,6 +733,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff_head head; + struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -758,8 +759,10 @@ new_mtu: rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + msg->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tipc_sk(sk)->link_cong = 1; @@ -895,6 +898,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff_head head; struct sk_buff *skb; struct tipc_name_seq *seq = &dest->addr.nameseq; + struct iov_iter save; u32 mtu; long timeo; int rc; @@ -963,6 +967,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); __skb_queue_head_init(&head); @@ -980,8 +985,10 @@ new_mtu: rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + m->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tsk->link_cong = 1; @@ -1052,6 +1059,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, long timeo; u32 dnode; uint mtu, send, sent = 0; + struct iov_iter save; /* Handle implied connection establishment */ if (unlikely(dest)) { @@ -1078,6 +1086,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, dnode = tsk_peer_node(tsk); next: + save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); __skb_queue_head_init(&head); @@ -1097,6 +1106,7 @@ next: if (rc == -EMSGSIZE) { tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); + m->msg_iter = save; goto next; } if (rc != -ELINKCONG) -- cgit v1.1 From 31a25fae85956e3a9c778141d29e5e803fb0b124 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:53:57 -0500 Subject: net: bury net/core/iovec.c - nothing in there is used anymore Signed-off-by: Al Viro --- include/linux/socket.h | 7 --- net/core/Makefile | 2 +- net/core/iovec.c | 137 ------------------------------------------------- 3 files changed, 1 insertion(+), 145 deletions(-) delete mode 100644 net/core/iovec.c diff --git a/include/linux/socket.h b/include/linux/socket.h index 6e49a14..5c19cba 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -318,13 +318,6 @@ struct ucred { /* IPX options */ #define IPX_TYPE 1 -extern int csum_partial_copy_fromiovecend(unsigned char *kdata, - struct iovec *iov, - int offset, - unsigned int len, __wsum *csump); -extern unsigned long iov_pages(const struct iovec *iov, int offset, - unsigned long nr_segs); - extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); diff --git a/net/core/Makefile b/net/core/Makefile index 235e6c5..fec0856 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux networking core. # -obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ +obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/iovec.c b/net/core/iovec.c deleted file mode 100644 index dcbe98b..0000000 --- a/net/core/iovec.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * iovec manipulation routines. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Fixes: - * Andrew Lunn : Errors in iovec copying. - * Pedro Roque : Added memcpy_fromiovecend and - * csum_..._fromiovecend. - * Andi Kleen : fixed error handling for 2.1 - * Alexey Kuznetsov: 2.1 optimisations - * Andi Kleen : Fix csum*fromiovecend for IPv6. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * And now for the all-in-one: copy and checksum from a user iovec - * directly to a datagram - * Calls to csum_partial but the last must be in 32 bit chunks - * - * ip_build_xmit must ensure that when fragmenting only the last - * call to this function will be unaligned also. - */ -int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, __wsum *csump) -{ - __wsum csum = *csump; - int partial_cnt = 0, err = 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - - /* There is a remnant from previous iov. */ - if (partial_cnt) { - int par_len = 4 - partial_cnt; - - /* iov component is too short ... */ - if (par_len > copy) { - if (copy_from_user(kdata, base, copy)) - goto out_fault; - kdata += copy; - base += copy; - partial_cnt += copy; - len -= copy; - iov++; - if (len) - continue; - *csump = csum_partial(kdata - partial_cnt, - partial_cnt, csum); - goto out; - } - if (copy_from_user(kdata, base, par_len)) - goto out_fault; - csum = csum_partial(kdata - partial_cnt, 4, csum); - kdata += par_len; - base += par_len; - copy -= par_len; - len -= par_len; - partial_cnt = 0; - } - - if (len > copy) { - partial_cnt = copy % 4; - if (partial_cnt) { - copy -= partial_cnt; - if (copy_from_user(kdata + copy, base + copy, - partial_cnt)) - goto out_fault; - } - } - - if (copy) { - csum = csum_and_copy_from_user(base, kdata, copy, - csum, &err); - if (err) - goto out; - } - len -= copy + partial_cnt; - kdata += copy + partial_cnt; - iov++; - } - *csump = csum; -out: - return err; - -out_fault: - err = -EFAULT; - goto out; -} -EXPORT_SYMBOL(csum_partial_copy_fromiovecend); - -unsigned long iov_pages(const struct iovec *iov, int offset, - unsigned long nr_segs) -{ - unsigned long seg, base; - int pages = 0, len, size; - - while (nr_segs && (offset >= iov->iov_len)) { - offset -= iov->iov_len; - ++iov; - --nr_segs; - } - - for (seg = 0; seg < nr_segs; seg++) { - base = (unsigned long)iov[seg].iov_base + offset; - len = iov[seg].iov_len - offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - pages += size; - offset = 0; - } - - return pages; -} -EXPORT_SYMBOL(iov_pages); -- cgit v1.1 From 1d10eb2f156f5fc83cf6c7ce60441592e66eadb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 16:39:25 -0500 Subject: crypto: switch af_alg_make_sg() to iov_iter With that, all ->sendmsg() instances are converted to iov_iter primitives and are agnostic wrt the kind of iov_iter they are working with. So's the last remaining ->recvmsg() instance that wasn't kind-agnostic yet. All ->sendmsg() and ->recvmsg() advance ->msg_iter by the amount actually copied and none of them modifies the underlying iovec, etc. Cc: linux-crypto@vger.kernel.org Signed-off-by: Al Viro --- crypto/af_alg.c | 40 ++++++++------------------ crypto/algif_hash.c | 45 ++++++++++++------------------ crypto/algif_skcipher.c | 74 ++++++++++++++++++++++--------------------------- include/crypto/if_alg.h | 3 +- 4 files changed, 62 insertions(+), 100 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 4665b79..eb78fe8 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -338,49 +338,31 @@ static const struct net_proto_family alg_family = { .owner = THIS_MODULE, }; -int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, - int write) +int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len) { - unsigned long from = (unsigned long)addr; - unsigned long npages; - unsigned off; - int err; - int i; - - err = -EFAULT; - if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len)) - goto out; - - off = from & ~PAGE_MASK; - npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (npages > ALG_MAX_PAGES) - npages = ALG_MAX_PAGES; + size_t off; + ssize_t n; + int npages, i; - err = get_user_pages_fast(from, npages, write, sgl->pages); - if (err < 0) - goto out; + n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off); + if (n < 0) + return n; - npages = err; - err = -EINVAL; + npages = PAGE_ALIGN(off + n); if (WARN_ON(npages == 0)) - goto out; - - err = 0; + return -EINVAL; sg_init_table(sgl->sg, npages); - for (i = 0; i < npages; i++) { + for (i = 0, len = n; i < npages; i++) { int plen = min_t(int, len, PAGE_SIZE - off); sg_set_page(sgl->sg + i, sgl->pages[i], plen, off); off = 0; len -= plen; - err += plen; } - -out: - return err; + return n; } EXPORT_SYMBOL_GPL(af_alg_make_sg); diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 01f56eb..01da360 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -41,8 +41,6 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock, struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; - unsigned long iovlen; - const struct iovec *iov; long copied = 0; int err; @@ -58,37 +56,28 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock, ctx->more = 0; - for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0; - iovlen--, iov++) { - unsigned long seglen = iov->iov_len; - char __user *from = iov->iov_base; + while (iov_iter_count(&msg->msg_iter)) { + int len = iov_iter_count(&msg->msg_iter); - while (seglen) { - int len = min_t(unsigned long, seglen, limit); - int newlen; + if (len > limit) + len = limit; - newlen = af_alg_make_sg(&ctx->sgl, from, len, 0); - if (newlen < 0) { - err = copied ? 0 : newlen; - goto unlock; - } - - ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, - newlen); - - err = af_alg_wait_for_completion( - crypto_ahash_update(&ctx->req), - &ctx->completion); + len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len); + if (len < 0) { + err = copied ? 0 : len; + goto unlock; + } - af_alg_free_sg(&ctx->sgl); + ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len); - if (err) - goto unlock; + err = af_alg_wait_for_completion(crypto_ahash_update(&ctx->req), + &ctx->completion); + af_alg_free_sg(&ctx->sgl); + if (err) + goto unlock; - seglen -= newlen; - from += newlen; - copied += newlen; - } + copied += len; + iov_iter_advance(&msg->msg_iter, len); } err = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index c12207c..37110fd 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -426,67 +426,59 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, &ctx->req)); struct skcipher_sg_list *sgl; struct scatterlist *sg; - unsigned long iovlen; - const struct iovec *iov; int err = -EAGAIN; int used; long copied = 0; lock_sock(sk); - for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0; - iovlen--, iov++) { - unsigned long seglen = iov->iov_len; - char __user *from = iov->iov_base; - - while (seglen) { - sgl = list_first_entry(&ctx->tsgl, - struct skcipher_sg_list, list); - sg = sgl->sg; - - while (!sg->length) - sg++; - - if (!ctx->used) { - err = skcipher_wait_for_data(sk, flags); - if (err) - goto unlock; - } + while (iov_iter_count(&msg->msg_iter)) { + sgl = list_first_entry(&ctx->tsgl, + struct skcipher_sg_list, list); + sg = sgl->sg; - used = min_t(unsigned long, ctx->used, seglen); + while (!sg->length) + sg++; - used = af_alg_make_sg(&ctx->rsgl, from, used, 1); - err = used; - if (err < 0) + used = ctx->used; + if (!used) { + err = skcipher_wait_for_data(sk, flags); + if (err) goto unlock; + } + + used = min_t(unsigned long, used, iov_iter_count(&msg->msg_iter)); + + used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used); + err = used; + if (err < 0) + goto unlock; - if (ctx->more || used < ctx->used) - used -= used % bs; + if (ctx->more || used < ctx->used) + used -= used % bs; - err = -EINVAL; - if (!used) - goto free; + err = -EINVAL; + if (!used) + goto free; - ablkcipher_request_set_crypt(&ctx->req, sg, - ctx->rsgl.sg, used, - ctx->iv); + ablkcipher_request_set_crypt(&ctx->req, sg, + ctx->rsgl.sg, used, + ctx->iv); - err = af_alg_wait_for_completion( + err = af_alg_wait_for_completion( ctx->enc ? crypto_ablkcipher_encrypt(&ctx->req) : crypto_ablkcipher_decrypt(&ctx->req), &ctx->completion); free: - af_alg_free_sg(&ctx->rsgl); + af_alg_free_sg(&ctx->rsgl); - if (err) - goto unlock; + if (err) + goto unlock; - copied += used; - from += used; - seglen -= used; - skcipher_pull_sgl(sk, used); - } + copied += used; + skcipher_pull_sgl(sk, used); + iov_iter_advance(&msg->msg_iter, used); } err = 0; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index cd62bf4..88ea64e 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -67,8 +67,7 @@ int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); int af_alg_accept(struct sock *sk, struct socket *newsock); -int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, - int write); +int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); void af_alg_free_sg(struct af_alg_sgl *sgl); int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con); -- cgit v1.1 From 6d65233020765ea25541952276217d49e3ecbf9e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Jan 2015 16:12:56 -0500 Subject: net/socket.c: fold do_sock_{read,write} into callers Signed-off-by: Al Viro --- net/socket.c | 56 +++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/net/socket.c b/net/socket.c index 3326d67..4d08b50 100644 --- a/net/socket.c +++ b/net/socket.c @@ -845,25 +845,11 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) -{ - struct socket *sock = file->private_data; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - - return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags); -} - static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { + struct file *file = iocb->ki_filp; + struct socket *sock = file->private_data; struct msghdr msg; if (pos != 0) @@ -872,36 +858,36 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs); -} - -static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) -{ - struct socket *sock = file->private_data; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - if (sock->type == SOCK_SEQPACKET) - msg->msg_flags |= MSG_EOR; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + iov_iter_init(&msg.msg_iter, READ, iov, nr_segs, iocb->ki_nbytes); + msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes); + return __sock_recvmsg(iocb, sock, &msg, iocb->ki_nbytes, msg.msg_flags); } static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { + struct file *file = iocb->ki_filp; + struct socket *sock = file->private_data; struct msghdr msg; if (pos != 0) return -ESPIPE; - return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + iov_iter_init(&msg.msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes); + msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + if (sock->type == SOCK_SEQPACKET) + msg.msg_flags |= MSG_EOR; + + return __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); } /* -- cgit v1.1 From 8ae5e030f30e50a81df1b269d5a5c32d023aa66d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 19:40:50 -0500 Subject: net: switch sockets to ->read_iter/->write_iter Signed-off-by: Al Viro --- net/socket.c | 56 +++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/net/socket.c b/net/socket.c index 4d08b50..bbedbfc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -113,10 +113,8 @@ unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -142,8 +140,10 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, .llseek = no_llseek, - .aio_read = sock_aio_read, - .aio_write = sock_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = sock_read_iter, + .write_iter = sock_write_iter, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -845,49 +845,47 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg; + struct msghdr msg = {.msg_iter = *to}; + ssize_t res; + + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; - iov_iter_init(&msg.msg_iter, READ, iov, nr_segs, iocb->ki_nbytes); - msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - - return __sock_recvmsg(iocb, sock, &msg, iocb->ki_nbytes, msg.msg_flags); + res = __sock_recvmsg(iocb, sock, &msg, + iocb->ki_nbytes, msg.msg_flags); + *to = msg.msg_iter; + return res; } -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - struct msghdr msg; + struct msghdr msg = {.msg_iter = *from}; + ssize_t res; - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; - iov_iter_init(&msg.msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes); - msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; + if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - return __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + *from = msg.msg_iter; + return res; } /* -- cgit v1.1 From aad9a1cec7dcd1d45809b64643fce37061b17788 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 14:49:01 -0500 Subject: vhost: switch vhost get_indirect() to iov_iter, kill memcpy_fromiovec() Cc: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- drivers/vhost/vhost.c | 6 ++++-- include/linux/uio.h | 1 - lib/iovec.c | 25 ------------------------- 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index cb807d0..2ee2826 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1125,6 +1125,7 @@ static int get_indirect(struct vhost_virtqueue *vq, struct vring_desc desc; unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); + struct iov_iter from; int ret; /* Sanity check */ @@ -1142,6 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq, vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } + iov_iter_init(&from, READ, vq->indirect, ret, len); /* We will use the result as an address to read from, so most * architectures only need a compiler barrier here. */ @@ -1164,8 +1166,8 @@ static int get_indirect(struct vhost_virtqueue *vq, i, count); return -EINVAL; } - if (unlikely(memcpy_fromiovec((unsigned char *)&desc, - vq->indirect, sizeof desc))) { + if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) != + sizeof(desc))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); return -EINVAL; diff --git a/include/linux/uio.h b/include/linux/uio.h index 1c5e453..af3439f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -135,7 +135,6 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, diff --git a/lib/iovec.c b/lib/iovec.c index 2d99cb4..4a90875 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -3,31 +3,6 @@ #include /* - * Copy iovec to kernel. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, len, iov->iov_len); - if (copy_from_user(kdata, iov->iov_base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovec); - -/* * Copy kernel to iovec. Returns -EFAULT on error. */ -- cgit v1.1 From 98a527aac1eb198dbc4405b800e102563ed8e4dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 15:00:58 -0500 Subject: vhost: don't bother with copying iovec in handle_tx() just advance the msg.msg_iter and be done with that. Cc: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- drivers/vhost/net.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6906f76..d86cc9b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -336,7 +336,7 @@ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; - unsigned out, in, s; + unsigned out, in; int head; struct msghdr msg = { .msg_name = NULL, @@ -395,16 +395,17 @@ static void handle_tx(struct vhost_net *net) break; } /* Skip header. TODO: support TSO. */ - s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); len = iov_length(vq->iov, out); iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); + iov_iter_advance(&msg.msg_iter, hdr_size); /* Sanity check */ - if (!len) { + if (!iov_iter_count(&msg.msg_iter)) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", - iov_length(nvq->hdr, s), hdr_size); + len, hdr_size); break; } + len = iov_iter_count(&msg.msg_iter); zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != -- cgit v1.1 From ba7438aed924133df54a60e4cd5499d359bcf2a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 15:51:28 -0500 Subject: vhost: don't bother copying iovecs in handle_rx(), kill memcpy_toiovecend() Cc: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- drivers/vhost/net.c | 82 +++++++++++++++-------------------------------------- include/linux/uio.h | 3 -- lib/iovec.c | 26 ----------------- 3 files changed, 23 insertions(+), 88 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d86cc9b..e022cc4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -84,10 +84,6 @@ struct vhost_net_ubuf_ref { struct vhost_net_virtqueue { struct vhost_virtqueue vq; - /* hdr is used to store the virtio header. - * Since each iovec has >= 1 byte length, we never need more than - * header length entries to store the header. */ - struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; size_t vhost_hlen; size_t sock_hlen; /* vhost zerocopy support fields below: */ @@ -235,44 +231,6 @@ static bool vhost_sock_zcopy(struct socket *sock) sock_flag(sock->sk, SOCK_ZEROCOPY); } -/* Pop first len bytes from iovec. Return number of segments used. */ -static int move_iovec_hdr(struct iovec *from, struct iovec *to, - size_t len, int iov_count) -{ - int seg = 0; - size_t size; - - while (len && seg < iov_count) { - size = min(from->iov_len, len); - to->iov_base = from->iov_base; - to->iov_len = size; - from->iov_len -= size; - from->iov_base += size; - len -= size; - ++from; - ++to; - ++seg; - } - return seg; -} -/* Copy iovec entries for len bytes from iovec. */ -static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, - size_t len, int iovcount) -{ - int seg = 0; - size_t size; - - while (len && seg < iovcount) { - size = min(from->iov_len, len); - to->iov_base = from->iov_base; - to->iov_len = size; - len -= size; - ++from; - ++to; - ++seg; - } -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -570,9 +528,9 @@ static void handle_rx(struct vhost_net *net) .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; - struct virtio_net_hdr_mrg_rxbuf hdr = { - .hdr.flags = 0, - .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE + struct virtio_net_hdr hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE }; size_t total_len = 0; int err, mergeable; @@ -580,6 +538,7 @@ static void handle_rx(struct vhost_net *net) size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; + struct iov_iter fixup; mutex_lock(&vq->mutex); sock = vq->private_data; @@ -624,14 +583,19 @@ static void handle_rx(struct vhost_net *net) break; } /* We don't need to be notified again. */ - if (unlikely((vhost_hlen))) - /* Skip header. TODO: support TSO. */ - move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); - else - /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: - * needed because recvmsg can modify msg_iov. */ - copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); - iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len); + iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); + fixup = msg.msg_iter; + if (unlikely((vhost_hlen))) { + /* We will supply the header ourselves + * TODO: support TSO. + */ + iov_iter_advance(&msg.msg_iter, vhost_hlen); + } else { + /* It'll come from socket; we'll need to patch + * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF + */ + iov_iter_advance(&fixup, sizeof(hdr)); + } err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: @@ -643,18 +607,18 @@ static void handle_rx(struct vhost_net *net) vhost_discard_vq_desc(vq, headcount); continue; } + /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ if (unlikely(vhost_hlen) && - memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, - vhost_hlen)) { + copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr at addr %p\n", vq->iov->iov_base); break; } - /* TODO: Should check and handle checksum. */ + /* Supply (or replace) ->num_buffers if VIRTIO_NET_F_MRG_RXBUF + * TODO: Should check and handle checksum. + */ if (likely(mergeable) && - memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, - offsetof(typeof(hdr), num_buffers), - sizeof hdr.num_buffers)) { + copy_to_iter(&headcount, 2, &fixup) != 2) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); break; diff --git a/include/linux/uio.h b/include/linux/uio.h index af3439f..02bd8a9 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -137,7 +137,4 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct io int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); -int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, - int offset, int len); - #endif diff --git a/lib/iovec.c b/lib/iovec.c index 4a90875..d8f17a9 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -3,32 +3,6 @@ #include /* - * Copy kernel to iovec. Returns -EFAULT on error. - */ - -int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, - int offset, int len) -{ - int copy; - for (; len > 0; ++iov) { - /* Skip over the finished iovecs */ - if (unlikely(offset >= iov->iov_len)) { - offset -= iov->iov_len; - continue; - } - copy = min_t(unsigned int, iov->iov_len - offset, len); - if (copy_to_user(iov->iov_base + offset, kdata, copy)) - return -EFAULT; - offset = 0; - kdata += copy; - len -= copy; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovecend); - -/* * Copy iovec to kernel. Returns -EFAULT on error. */ -- cgit v1.1 From 57dd8a0735aabff4862025cf64ad94da3d80e620 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 16:03:43 -0500 Subject: vhost: vhost_scsi_handle_vq() should just use copy_from_user() it has just verified that it asks no more than the length of the first segment of iovec. And with that the last user of stuff in lib/iovec.c is gone. RIP. Cc: Michael S. Tsirkin Cc: Nicholas A. Bellinger Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- drivers/vhost/scsi.c | 2 +- include/linux/uio.h | 2 -- lib/Makefile | 2 +- lib/iovec.c | 36 ------------------------------------ 4 files changed, 2 insertions(+), 40 deletions(-) delete mode 100644 lib/iovec.c diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index d695b16..dc78d87 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1079,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) req_size, vq->iov[0].iov_len); break; } - ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size); + ret = copy_from_user(req, vq->iov[0].iov_base, req_size); if (unlikely(ret)) { vq_err(vq, "Faulted on virtio_scsi_cmd_req\n"); break; diff --git a/include/linux/uio.h b/include/linux/uio.h index 02bd8a9..3e0cb4e 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -135,6 +135,4 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len); #endif diff --git a/lib/Makefile b/lib/Makefile index 3c3b30b..1071d06 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -24,7 +24,7 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o diff --git a/lib/iovec.c b/lib/iovec.c deleted file mode 100644 index d8f17a9..0000000 --- a/lib/iovec.c +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include -#include - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - */ - -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len) -{ - /* No data? Done! */ - if (len == 0) - return 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - if (copy_from_user(kdata, base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovecend); -- cgit v1.1