diff options
author | Tejun Heo <tj@kernel.org> | 2010-06-02 20:40:00 +0200 |
---|---|---|
committer | Michael S. Tsirkin <mst@redhat.com> | 2010-07-28 15:44:53 +0300 |
commit | c23f3445e68e1db0e74099f264bc5ff5d55ebdeb (patch) | |
tree | 0a8e22e9a10c2978777954a022d721eb02e622be /drivers/vhost/vhost.c | |
parent | 4cfa580e7eebb8694b875d2caff3b989ada2efac (diff) | |
download | op-kernel-dev-c23f3445e68e1db0e74099f264bc5ff5d55ebdeb.zip op-kernel-dev-c23f3445e68e1db0e74099f264bc5ff5d55ebdeb.tar.gz |
vhost: replace vhost_workqueue with per-vhost kthread
Replace vhost_workqueue with per-vhost kthread. Other than callback
argument change from struct work_struct * to struct vhost_work *,
there's no visible change to vhost_poll_*() interface.
This conversion is to make each vhost use a dedicated kthread so that
resource control via cgroup can be applied.
Partially based on Sridhar Samudrala's patch.
* Updated to use sub structure vhost_work instead of directly using
vhost_poll at Michael's suggestion.
* Added flusher wake_up() optimization at Michael's suggestion.
Changes by MST:
* Converted atomics/barrier use to a spinlock.
* Create thread on SET_OWNER
* Fix flushing
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Sridhar Samudrala <samudrala.sridhar@gmail.com>
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r-- | drivers/vhost/vhost.c | 143 |
1 files changed, 115 insertions, 28 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 248ed2d..30d93c2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -17,12 +17,12 @@ #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/mutex.h> -#include <linux/workqueue.h> #include <linux/rcupdate.h> #include <linux/poll.h> #include <linux/file.h> #include <linux/highmem.h> #include <linux/slab.h> +#include <linux/kthread.h> #include <linux/net.h> #include <linux/if_packet.h> @@ -37,8 +37,6 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -static struct workqueue_struct *vhost_workqueue; - static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -52,23 +50,31 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct vhost_poll *poll; - poll = container_of(wait, struct vhost_poll, wait); + struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + if (!((unsigned long)key & poll->mask)) return 0; - queue_work(vhost_workqueue, &poll->work); + vhost_poll_queue(poll); return 0; } /* Init poll structure */ -void vhost_poll_init(struct vhost_poll *poll, work_func_t func, - unsigned long mask) +void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, + unsigned long mask, struct vhost_dev *dev) { - INIT_WORK(&poll->work, func); + struct vhost_work *work = &poll->work; + init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; + poll->dev = dev; + + INIT_LIST_HEAD(&work->node); + work->fn = fn; + init_waitqueue_head(&work->done); + work->flushing = 0; + work->queue_seq = work->done_seq = 0; } /* Start polling a file. We add ourselves to file's wait queue. The caller must @@ -92,12 +98,40 @@ void vhost_poll_stop(struct vhost_poll *poll) * locks that are also used by the callback. */ void vhost_poll_flush(struct vhost_poll *poll) { - flush_work(&poll->work); + struct vhost_work *work = &poll->work; + unsigned seq; + int left; + int flushing; + + spin_lock_irq(&poll->dev->work_lock); + seq = work->queue_seq; + work->flushing++; + spin_unlock_irq(&poll->dev->work_lock); + wait_event(work->done, ({ + spin_lock_irq(&poll->dev->work_lock); + left = seq - work->done_seq <= 0; + spin_unlock_irq(&poll->dev->work_lock); + left; + })); + spin_lock_irq(&poll->dev->work_lock); + flushing = --work->flushing; + spin_unlock_irq(&poll->dev->work_lock); + BUG_ON(flushing < 0); } void vhost_poll_queue(struct vhost_poll *poll) { - queue_work(vhost_workqueue, &poll->work); + struct vhost_dev *dev = poll->dev; + struct vhost_work *work = &poll->work; + unsigned long flags; + + spin_lock_irqsave(&dev->work_lock, flags); + if (list_empty(&work->node)) { + list_add_tail(&work->node, &dev->work_list); + work->queue_seq++; + wake_up_process(dev->worker); + } + spin_unlock_irqrestore(&dev->work_lock, flags); } static void vhost_vq_reset(struct vhost_dev *dev, @@ -125,10 +159,51 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_ctx = NULL; } +static int vhost_worker(void *data) +{ + struct vhost_dev *dev = data; + struct vhost_work *work = NULL; + unsigned uninitialized_var(seq); + + for (;;) { + /* mb paired w/ kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock_irq(&dev->work_lock); + if (work) { + work->done_seq = seq; + if (work->flushing) + wake_up_all(&work->done); + } + + if (kthread_should_stop()) { + spin_unlock_irq(&dev->work_lock); + __set_current_state(TASK_RUNNING); + return 0; + } + if (!list_empty(&dev->work_list)) { + work = list_first_entry(&dev->work_list, + struct vhost_work, node); + list_del_init(&work->node); + seq = work->queue_seq; + } else + work = NULL; + spin_unlock_irq(&dev->work_lock); + + if (work) { + __set_current_state(TASK_RUNNING); + work->fn(work); + } else + schedule(); + + } +} + long vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue *vqs, int nvqs) { int i; + dev->vqs = vqs; dev->nvqs = nvqs; mutex_init(&dev->mutex); @@ -136,6 +211,9 @@ long vhost_dev_init(struct vhost_dev *dev, dev->log_file = NULL; dev->memory = NULL; dev->mm = NULL; + spin_lock_init(&dev->work_lock); + INIT_LIST_HEAD(&dev->work_list); + dev->worker = NULL; for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i].dev = dev; @@ -143,9 +221,9 @@ long vhost_dev_init(struct vhost_dev *dev, vhost_vq_reset(dev, dev->vqs + i); if (dev->vqs[i].handle_kick) vhost_poll_init(&dev->vqs[i].poll, - dev->vqs[i].handle_kick, - POLLIN); + dev->vqs[i].handle_kick, POLLIN, dev); } + return 0; } @@ -159,12 +237,31 @@ long vhost_dev_check_owner(struct vhost_dev *dev) /* Caller should have device mutex */ static long vhost_dev_set_owner(struct vhost_dev *dev) { + struct task_struct *worker; + int err; /* Is there an owner already? */ - if (dev->mm) - return -EBUSY; + if (dev->mm) { + err = -EBUSY; + goto err_mm; + } /* No owner, become one */ dev->mm = get_task_mm(current); + worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + goto err_worker; + } + + dev->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ + return 0; +err_worker: + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; +err_mm: + return err; } /* Caller should have device mutex */ @@ -217,6 +314,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) if (dev->mm) mmput(dev->mm); dev->mm = NULL; + + WARN_ON(!list_empty(&dev->work_list)); + kthread_stop(dev->worker); } static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) @@ -1115,16 +1215,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq) vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); } - -int vhost_init(void) -{ - vhost_workqueue = create_singlethread_workqueue("vhost"); - if (!vhost_workqueue) - return -ENOMEM; - return 0; -} - -void vhost_cleanup(void) -{ - destroy_workqueue(vhost_workqueue); -} |