From d47effe1be0c4fc983306a9c704632e3a087eed8 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Tue, 1 Mar 2011 17:06:37 +0530 Subject: vhost: Cleanup vhost.c and net.c Minor cleanup of vhost.c and net.c to match coding style. Signed-off-by: Krishna Kumar Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 19 +++++++++++------- drivers/vhost/vhost.c | 53 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f616cef..59dad9f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to, { int seg = 0; size_t size; + while (len && seg < iov_count) { size = min(from->iov_len, len); to->iov_base = from->iov_base; @@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, { int seg = 0; size_t size; + while (len && seg < iovcount) { size = min(from->iov_len, len); to->iov_base = from->iov_base; @@ -296,17 +298,16 @@ static void handle_rx_big(struct vhost_net *net) .msg_iov = vq->iov, .msg_flags = MSG_DONTWAIT, }; - struct virtio_net_hdr hdr = { .flags = 0, .gso_type = VIRTIO_NET_HDR_GSO_NONE }; - size_t len, total_len = 0; int err; size_t hdr_size; /* TODO: check that we are running from vhost_worker? */ struct socket *sock = rcu_dereference_check(vq->private_data, 1); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; @@ -405,18 +406,17 @@ static void handle_rx_mergeable(struct vhost_net *net) .msg_iov = vq->iov, .msg_flags = MSG_DONTWAIT, }; - struct virtio_net_hdr_mrg_rxbuf hdr = { .hdr.flags = 0, .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; - size_t total_len = 0; int err, headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; /* TODO: check that we are running from vhost_worker? */ struct socket *sock = rcu_dereference_check(vq->private_data, 1); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; @@ -654,6 +654,7 @@ static struct socket *get_raw_socket(int fd) } uaddr; int uaddr_len = sizeof uaddr, r; struct socket *sock = sockfd_lookup(fd, &r); + if (!sock) return ERR_PTR(-ENOTSOCK); @@ -682,6 +683,7 @@ static struct socket *get_tap_socket(int fd) { struct file *file = fget(fd); struct socket *sock; + if (!file) return ERR_PTR(-EBADF); sock = tun_get_socket(file); @@ -696,6 +698,7 @@ static struct socket *get_tap_socket(int fd) static struct socket *get_socket(int fd) { struct socket *sock; + /* special case to disable backend */ if (fd == -1) return NULL; @@ -741,9 +744,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) oldsock = rcu_dereference_protected(vq->private_data, lockdep_is_held(&vq->mutex)); if (sock != oldsock) { - vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, sock); - vhost_net_enable_vq(n, vq); + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, sock); + vhost_net_enable_vq(n, vq); } mutex_unlock(&vq->mutex); @@ -768,6 +771,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; + mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) @@ -829,6 +833,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, struct vhost_vring_file backend; u64 features; int r; + switch (ioctl) { case VHOST_NET_SET_BACKEND: if (copy_from_user(&backend, argp, sizeof backend)) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ade0568..b0cc7f8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct vhost_poll *poll; - poll = container_of(pt, struct vhost_poll, table); + poll = container_of(pt, struct vhost_poll, table); poll->wqh = wqh; add_wait_queue(wqh, &poll->wait); } @@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, void vhost_poll_start(struct vhost_poll *poll, struct file *file) { unsigned long mask; + mask = file->f_op->poll(file, &poll->table); if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); @@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, unsigned seq) { int left; + spin_lock_irq(&dev->work_lock); left = seq - work->done_seq; spin_unlock_irq(&dev->work_lock); @@ -222,6 +224,7 @@ static int vhost_worker(void *data) static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { int i; + for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * UIO_MAXIOV, GFP_KERNEL); @@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) goto err_nomem; } return 0; + err_nomem: for (; i >= 0; --i) { kfree(dev->vqs[i].indirect); @@ -247,6 +251,7 @@ err_nomem: static void vhost_dev_free_iovecs(struct vhost_dev *dev) { int i; + for (i = 0; i < dev->nvqs; ++i) { kfree(dev->vqs[i].indirect); dev->vqs[i].indirect = NULL; @@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev) } struct vhost_attach_cgroups_struct { - struct vhost_work work; - struct task_struct *owner; - int ret; + struct vhost_work work; + struct task_struct *owner; + int ret; }; static void vhost_attach_cgroups_work(struct vhost_work *work) { - struct vhost_attach_cgroups_struct *s; - s = container_of(work, struct vhost_attach_cgroups_struct, work); - s->ret = cgroup_attach_task_all(s->owner, current); + struct vhost_attach_cgroups_struct *s; + + s = container_of(work, struct vhost_attach_cgroups_struct, work); + s->ret = cgroup_attach_task_all(s->owner, current); } static int vhost_attach_cgroups(struct vhost_dev *dev) { - struct vhost_attach_cgroups_struct attach; - attach.owner = current; - vhost_work_init(&attach.work, vhost_attach_cgroups_work); - vhost_work_queue(dev, &attach.work); - vhost_work_flush(dev, &attach.work); - return attach.ret; + struct vhost_attach_cgroups_struct attach; + + attach.owner = current; + vhost_work_init(&attach.work, vhost_attach_cgroups_work); + vhost_work_queue(dev, &attach.work); + vhost_work_flush(dev, &attach.work); + return attach.ret; } /* Caller should have device mutex */ @@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev) { struct task_struct *worker; int err; + /* Is there an owner already? */ if (dev->mm) { err = -EBUSY; goto err_mm; } + /* No owner, become one */ dev->mm = get_task_mm(current); worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); @@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) void vhost_dev_cleanup(struct vhost_dev *dev) { int i; + for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { vhost_poll_stop(&dev->vqs[i].poll); @@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { u64 a = addr / VHOST_PAGE_SIZE / 8; + /* Make sure 64 bit math will not overflow. */ if (a > ULONG_MAX - (unsigned long)log_base || a + (unsigned long)log_base > ULONG_MAX) @@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, int log_all) { int i; + for (i = 0; i < d->nvqs; ++i) { int ok; mutex_lock(&d->vqs[i].mutex); @@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem, *oldmem; unsigned long size = offsetof(struct vhost_memory, regions); + if (copy_from_user(&mem, m, size)) return -EFAULT; if (mem.padding) @@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EFAULT; } - if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) { + if (!memory_access_ok(d, newmem, + vhost_has_feature(d, VHOST_F_LOG_ALL))) { kfree(newmem); return -EFAULT; } @@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq, struct vring_used __user *used) { int r = put_user(vq->used_flags, &used->flags); + if (r) return r; return get_user(vq->last_used_idx, &used->idx); @@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem, { struct vhost_memory_region *reg; int i; + /* linear search is not brilliant, but we really have on the order of 6 * regions in practice */ for (i = 0; i < mem->nregions; ++i) { @@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr) void *base; int bit = nr + (log % PAGE_SIZE) * 8; int r; + r = get_user_pages_fast(log, 1, 1, &page); if (r < 0) return r; @@ -888,6 +905,7 @@ static int log_write(void __user *log_base, { u64 write_page = write_address / VHOST_PAGE_SIZE; int r; + if (!write_length) return 0; write_length += write_address % VHOST_PAGE_SIZE; @@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq, i, count); return -EINVAL; } - if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, - sizeof desc))) { + if (unlikely(memcpy_fromiovec((unsigned char *)&desc, + vq->indirect, sizeof desc))) { vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", i, (size_t)indirect->addr + i * sizeof desc); return -EINVAL; @@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { __u16 flags; + /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ @@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) { u16 avail_idx; int r; + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; @@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) void vhost_disable_notify(struct vhost_virtqueue *vq) { int r; + if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; -- cgit v1.1 From fcc042a2806064ffcaed7a0c5cb710eca0e99108 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 6 Mar 2011 13:33:49 +0200 Subject: vhost: copy_from_user -> __copy_from_user copy_from_user is pretty high on perf top profile, replacing it with __copy_from_user helps. It's also safe because we do access_ok checks during setup. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b0cc7f8..2ab2912 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1171,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, i, vq->num, head); return -EINVAL; } - ret = copy_from_user(&desc, vq->desc + i, sizeof desc); + ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); -- cgit v1.1 From cfbdab951369f15de890597530076bf0119361be Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 17 Jan 2011 16:10:59 +0800 Subject: vhost-net: check the support of mergeable buffer outside the receive loop No need to check the support of mergeable buffer inside the recevie loop as the whole handle_rx()_xx is in the read critical region. So this patch move it ahead of the receiving loop. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 59dad9f..9f57cd4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net) .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; size_t total_len = 0; - int err, headcount; + int err, headcount, mergeable; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; /* TODO: check that we are running from vhost_worker? */ @@ -427,6 +427,7 @@ static void handle_rx_mergeable(struct vhost_net *net) vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; + mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF); while ((sock_len = peek_head_len(sock->sk))) { sock_len += sock_hlen; @@ -476,7 +477,7 @@ static void handle_rx_mergeable(struct vhost_net *net) break; } /* TODO: Should check and handle checksum. */ - if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && + if (likely(mergeable) && memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { -- cgit v1.1 From 94249369e9930276e30087da205349a55478cbb5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 17 Jan 2011 16:11:08 +0800 Subject: vhost-net: Unify the code of mergeable and big buffer handling Codes duplication were found between the handling of mergeable and big buffers, so this patch tries to unify them. This could be easily done by adding a quota to the get_rx_bufs() which is used to limit the number of buffers it returns (for mergeable buffer, the quota is simply UIO_MAXIOV, for big buffers, the quota is just 1), and then the previous handle_rx_mergeable() could be resued also for big buffers. Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 128 +++------------------------------------------------- 1 file changed, 7 insertions(+), 121 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9f57cd4..0329c41 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -229,6 +229,7 @@ static int peek_head_len(struct sock *sk) * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset + * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, @@ -236,7 +237,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, int datalen, unsigned *iovcount, struct vhost_log *log, - unsigned *log_num) + unsigned *log_num, + unsigned int quota) { unsigned int out, in; int seg = 0; @@ -244,7 +246,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, unsigned d; int r, nlogs = 0; - while (datalen > 0) { + while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { r = -ENOBUFS; goto err; @@ -284,116 +286,7 @@ err: /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ -static void handle_rx_big(struct vhost_net *net) -{ - struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; - unsigned out, in, log, s; - int head; - struct vhost_log *vq_log; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_control = NULL, /* FIXME: get and handle RX aux data. */ - .msg_controllen = 0, - .msg_iov = vq->iov, - .msg_flags = MSG_DONTWAIT, - }; - struct virtio_net_hdr hdr = { - .flags = 0, - .gso_type = VIRTIO_NET_HDR_GSO_NONE - }; - size_t len, total_len = 0; - int err; - size_t hdr_size; - /* TODO: check that we are running from vhost_worker? */ - struct socket *sock = rcu_dereference_check(vq->private_data, 1); - - if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) - return; - - mutex_lock(&vq->mutex); - vhost_disable_notify(vq); - hdr_size = vq->vhost_hlen; - - vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? - vq->log : NULL; - - for (;;) { - head = vhost_get_vq_desc(&net->dev, vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - vq_log, &log); - /* On error, stop handling until the next kick. */ - if (unlikely(head < 0)) - break; - /* OK, now we need to know about added descriptors. */ - if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { - /* They have slipped one in as we were - * doing that: check again. */ - vhost_disable_notify(vq); - continue; - } - /* Nothing new? Wait for eventfd to tell us - * they refilled. */ - break; - } - /* We don't need to be notified again. */ - if (out) { - vq_err(vq, "Unexpected descriptor format for RX: " - "out %d, int %d\n", - out, in); - break; - } - /* Skip header. TODO: support TSO/mergeable rx buffers. */ - s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); - msg.msg_iovlen = in; - len = iov_length(vq->iov, in); - /* Sanity check */ - if (!len) { - vq_err(vq, "Unexpected header len for RX: " - "%zd expected %zd\n", - iov_length(vq->hdr, s), hdr_size); - break; - } - err = sock->ops->recvmsg(NULL, sock, &msg, - len, MSG_DONTWAIT | MSG_TRUNC); - /* TODO: Check specific error and bomb out unless EAGAIN? */ - if (err < 0) { - vhost_discard_vq_desc(vq, 1); - break; - } - /* TODO: Should check and handle checksum. */ - if (err > len) { - pr_debug("Discarded truncated rx packet: " - " len %d > %zd\n", err, len); - vhost_discard_vq_desc(vq, 1); - continue; - } - len = err; - err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size); - if (err) { - vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n", - vq->iov->iov_base, err); - break; - } - len += hdr_size; - vhost_add_used_and_signal(&net->dev, vq, head, len); - if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, len); - total_len += len; - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { - vhost_poll_queue(&vq->poll); - break; - } - } - - mutex_unlock(&vq->mutex); -} - -/* Expects to be always run from workqueue - which acts as - * read-size critical section for our kind of RCU. */ -static void handle_rx_mergeable(struct vhost_net *net) +static void handle_rx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; unsigned uninitialized_var(in), log; @@ -433,7 +326,8 @@ static void handle_rx_mergeable(struct vhost_net *net) sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, - &in, vq_log, &log); + &in, vq_log, &log, + likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) break; @@ -499,14 +393,6 @@ static void handle_rx_mergeable(struct vhost_net *net) mutex_unlock(&vq->mutex); } -static void handle_rx(struct vhost_net *net) -{ - if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) - handle_rx_mergeable(net); - else - handle_rx_big(net); -} - static void handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -- cgit v1.1 From 783e3988544b94ff3918666b9f36866ac547fba1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 17 Jan 2011 16:11:17 +0800 Subject: vhost: lock receive queue, not the socket vhost takes a sock lock to try and prevent the skb from being pulled from the receive queue after skb_peek. However this is not the right lock to use for that, sk_receive_queue.lock is. Fix that up. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 0329c41..5720301 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk) { struct sk_buff *head; int len = 0; + unsigned long flags; - lock_sock(sk); + spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); - if (head) + if (likely(head)) len = head->len; - release_sock(sk); + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); return len; } -- cgit v1.1 From de4d768a428d9de943dd6dc82bcd61742955cb6e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 13 Mar 2011 23:00:52 +0200 Subject: vhost-net: remove unlocked use of receive_queue Use of skb_queue_empty(&sock->sk->sk_receive_queue) without taking the sk_receive_queue.lock is unsafe or useless. Take it out. Reported-by: Eric Dumazet Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5720301..2f7c76a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -311,7 +311,7 @@ static void handle_rx(struct vhost_net *net) /* TODO: check that we are running from vhost_worker? */ struct socket *sock = rcu_dereference_check(vq->private_data, 1); - if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) + if (!sock) return; mutex_lock(&vq->mutex); -- cgit v1.1