diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 21:04:56 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 21:04:56 +0900 |
commit | dc92b1f9ab1e1665dbbc56911782358e7f9a49f9 (patch) | |
tree | 965ccb4a0f2c24a8b24adce415f6506246d07a90 /drivers | |
parent | 5e090ed7af10729a396a25df43d69a236e789736 (diff) | |
parent | ca16f580a5db7e60bfafe59a50bb133bd3347491 (diff) | |
download | op-kernel-dev-dc92b1f9ab1e1665dbbc56911782358e7f9a49f9.zip op-kernel-dev-dc92b1f9ab1e1665dbbc56911782358e7f9a49f9.tar.gz |
Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio changes from Rusty Russell:
"New workflow: same git trees pulled by linux-next get sent straight to
Linus. Git is awkward at shuffling patches compared with quilt or mq,
but that doesn't happen often once things get into my -next branch."
* 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits)
lguest: fix occasional crash in example launcher.
virtio-blk: Disable callback in virtblk_done()
virtio_mmio: Don't attempt to create empty virtqueues
virtio_mmio: fix off by one error allocating queue
drivers/virtio/virtio_pci.c: fix error return code
virtio: don't crash when device is buggy
virtio: remove CONFIG_VIRTIO_RING
virtio: add help to CONFIG_VIRTIO option.
virtio: support reserved vqs
virtio: introduce an API to set affinity for a virtqueue
virtio-ring: move queue_index to vring_virtqueue
virtio_balloon: not EXPERIMENTAL any more.
virtio-balloon: dependency fix
virtio-blk: fix NULL checking in virtblk_alloc_req()
virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path
virtio-blk: Add bio-based IO path for virtio-blk
virtio: console: fix error handling in init() function
tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace
tools: Add guest trace agent as a user tool
virtio/console: Allocate scatterlist according to the current pipe size
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/virtio_blk.c | 306 | ||||
-rw-r--r-- | drivers/char/virtio_console.c | 198 | ||||
-rw-r--r-- | drivers/lguest/lguest_device.c | 5 | ||||
-rw-r--r-- | drivers/remoteproc/remoteproc_virtio.c | 5 | ||||
-rw-r--r-- | drivers/rpmsg/Kconfig | 1 | ||||
-rw-r--r-- | drivers/s390/kvm/kvm_virtio.c | 5 | ||||
-rw-r--r-- | drivers/virtio/Kconfig | 17 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 3 | ||||
-rw-r--r-- | drivers/virtio/virtio.c | 2 | ||||
-rw-r--r-- | drivers/virtio/virtio_mmio.c | 29 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci.c | 68 | ||||
-rw-r--r-- | drivers/virtio/virtio_ring.c | 14 |
12 files changed, 557 insertions, 96 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index c0bbeb4..0bdde8f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -14,6 +14,9 @@ #define PART_BITS 4 +static bool use_bio; +module_param(use_bio, bool, S_IRUGO); + static int major; static DEFINE_IDA(vd_index_ida); @@ -23,6 +26,7 @@ struct virtio_blk { struct virtio_device *vdev; struct virtqueue *vq; + wait_queue_head_t queue_wait; /* The disk structure for the kernel. */ struct gendisk *disk; @@ -51,53 +55,244 @@ struct virtio_blk struct virtblk_req { struct request *req; + struct bio *bio; struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; + struct work_struct work; + struct virtio_blk *vblk; + int flags; u8 status; + struct scatterlist sg[]; +}; + +enum { + VBLK_IS_FLUSH = 1, + VBLK_REQ_FLUSH = 2, + VBLK_REQ_DATA = 4, + VBLK_REQ_FUA = 8, }; -static void blk_done(struct virtqueue *vq) +static inline int virtblk_result(struct virtblk_req *vbr) +{ + switch (vbr->status) { + case VIRTIO_BLK_S_OK: + return 0; + case VIRTIO_BLK_S_UNSUPP: + return -ENOTTY; + default: + return -EIO; + } +} + +static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk, + gfp_t gfp_mask) { - struct virtio_blk *vblk = vq->vdev->priv; struct virtblk_req *vbr; - unsigned int len; - unsigned long flags; - spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); - while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { - int error; + vbr = mempool_alloc(vblk->pool, gfp_mask); + if (!vbr) + return NULL; - switch (vbr->status) { - case VIRTIO_BLK_S_OK: - error = 0; - break; - case VIRTIO_BLK_S_UNSUPP: - error = -ENOTTY; - break; - default: - error = -EIO; + vbr->vblk = vblk; + if (use_bio) + sg_init_table(vbr->sg, vblk->sg_elems); + + return vbr; +} + +static void virtblk_add_buf_wait(struct virtio_blk *vblk, + struct virtblk_req *vbr, + unsigned long out, + unsigned long in) +{ + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait_exclusive(&vblk->queue_wait, &wait, + TASK_UNINTERRUPTIBLE); + + spin_lock_irq(vblk->disk->queue->queue_lock); + if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, + GFP_ATOMIC) < 0) { + spin_unlock_irq(vblk->disk->queue->queue_lock); + io_schedule(); + } else { + virtqueue_kick(vblk->vq); + spin_unlock_irq(vblk->disk->queue->queue_lock); break; } - switch (vbr->req->cmd_type) { - case REQ_TYPE_BLOCK_PC: - vbr->req->resid_len = vbr->in_hdr.residual; - vbr->req->sense_len = vbr->in_hdr.sense_len; - vbr->req->errors = vbr->in_hdr.errors; - break; - case REQ_TYPE_SPECIAL: - vbr->req->errors = (error != 0); - break; - default: - break; + } + + finish_wait(&vblk->queue_wait, &wait); +} + +static inline void virtblk_add_req(struct virtblk_req *vbr, + unsigned int out, unsigned int in) +{ + struct virtio_blk *vblk = vbr->vblk; + + spin_lock_irq(vblk->disk->queue->queue_lock); + if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr, + GFP_ATOMIC) < 0)) { + spin_unlock_irq(vblk->disk->queue->queue_lock); + virtblk_add_buf_wait(vblk, vbr, out, in); + return; + } + virtqueue_kick(vblk->vq); + spin_unlock_irq(vblk->disk->queue->queue_lock); +} + +static int virtblk_bio_send_flush(struct virtblk_req *vbr) +{ + unsigned int out = 0, in = 0; + + vbr->flags |= VBLK_IS_FLUSH; + vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; + vbr->out_hdr.sector = 0; + vbr->out_hdr.ioprio = 0; + sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); + sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status)); + + virtblk_add_req(vbr, out, in); + + return 0; +} + +static int virtblk_bio_send_data(struct virtblk_req *vbr) +{ + struct virtio_blk *vblk = vbr->vblk; + unsigned int num, out = 0, in = 0; + struct bio *bio = vbr->bio; + + vbr->flags &= ~VBLK_IS_FLUSH; + vbr->out_hdr.type = 0; + vbr->out_hdr.sector = bio->bi_sector; + vbr->out_hdr.ioprio = bio_prio(bio); + + sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); + + num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out); + + sg_set_buf(&vbr->sg[num + out + in++], &vbr->status, + sizeof(vbr->status)); + + if (num) { + if (bio->bi_rw & REQ_WRITE) { + vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; + out += num; + } else { + vbr->out_hdr.type |= VIRTIO_BLK_T_IN; + in += num; } + } + + virtblk_add_req(vbr, out, in); + + return 0; +} + +static void virtblk_bio_send_data_work(struct work_struct *work) +{ + struct virtblk_req *vbr; + + vbr = container_of(work, struct virtblk_req, work); + + virtblk_bio_send_data(vbr); +} + +static void virtblk_bio_send_flush_work(struct work_struct *work) +{ + struct virtblk_req *vbr; + + vbr = container_of(work, struct virtblk_req, work); + + virtblk_bio_send_flush(vbr); +} + +static inline void virtblk_request_done(struct virtblk_req *vbr) +{ + struct virtio_blk *vblk = vbr->vblk; + struct request *req = vbr->req; + int error = virtblk_result(vbr); + + if (req->cmd_type == REQ_TYPE_BLOCK_PC) { + req->resid_len = vbr->in_hdr.residual; + req->sense_len = vbr->in_hdr.sense_len; + req->errors = vbr->in_hdr.errors; + } else if (req->cmd_type == REQ_TYPE_SPECIAL) { + req->errors = (error != 0); + } + + __blk_end_request_all(req, error); + mempool_free(vbr, vblk->pool); +} + +static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) +{ + struct virtio_blk *vblk = vbr->vblk; + + if (vbr->flags & VBLK_REQ_DATA) { + /* Send out the actual write data */ + INIT_WORK(&vbr->work, virtblk_bio_send_data_work); + queue_work(virtblk_wq, &vbr->work); + } else { + bio_endio(vbr->bio, virtblk_result(vbr)); + mempool_free(vbr, vblk->pool); + } +} + +static inline void virtblk_bio_data_done(struct virtblk_req *vbr) +{ + struct virtio_blk *vblk = vbr->vblk; - __blk_end_request_all(vbr->req, error); + if (unlikely(vbr->flags & VBLK_REQ_FUA)) { + /* Send out a flush before end the bio */ + vbr->flags &= ~VBLK_REQ_DATA; + INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); + queue_work(virtblk_wq, &vbr->work); + } else { + bio_endio(vbr->bio, virtblk_result(vbr)); mempool_free(vbr, vblk->pool); } +} + +static inline void virtblk_bio_done(struct virtblk_req *vbr) +{ + if (unlikely(vbr->flags & VBLK_IS_FLUSH)) + virtblk_bio_flush_done(vbr); + else + virtblk_bio_data_done(vbr); +} + +static void virtblk_done(struct virtqueue *vq) +{ + struct virtio_blk *vblk = vq->vdev->priv; + bool bio_done = false, req_done = false; + struct virtblk_req *vbr; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); + do { + virtqueue_disable_cb(vq); + while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { + if (vbr->bio) { + virtblk_bio_done(vbr); + bio_done = true; + } else { + virtblk_request_done(vbr); + req_done = true; + } + } + } while (!virtqueue_enable_cb(vq)); /* In case queue is stopped waiting for more buffers. */ - blk_start_queue(vblk->disk->queue); + if (req_done) + blk_start_queue(vblk->disk->queue); spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); + + if (bio_done) + wake_up(&vblk->queue_wait); } static bool do_req(struct request_queue *q, struct virtio_blk *vblk, @@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, unsigned long num, out = 0, in = 0; struct virtblk_req *vbr; - vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); + vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); if (!vbr) /* When another request finishes we'll try again. */ return false; vbr->req = req; - + vbr->bio = NULL; if (req->cmd_flags & REQ_FLUSH) { vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; vbr->out_hdr.sector = 0; @@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, } } - if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { + if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, + GFP_ATOMIC) < 0) { mempool_free(vbr, vblk->pool); return false; } @@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, return true; } -static void do_virtblk_request(struct request_queue *q) +static void virtblk_request(struct request_queue *q) { struct virtio_blk *vblk = q->queuedata; struct request *req; @@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q) virtqueue_kick(vblk->vq); } +static void virtblk_make_request(struct request_queue *q, struct bio *bio) +{ + struct virtio_blk *vblk = q->queuedata; + struct virtblk_req *vbr; + + BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems); + + vbr = virtblk_alloc_req(vblk, GFP_NOIO); + if (!vbr) { + bio_endio(bio, -ENOMEM); + return; + } + + vbr->bio = bio; + vbr->flags = 0; + if (bio->bi_rw & REQ_FLUSH) + vbr->flags |= VBLK_REQ_FLUSH; + if (bio->bi_rw & REQ_FUA) + vbr->flags |= VBLK_REQ_FUA; + if (bio->bi_size) + vbr->flags |= VBLK_REQ_DATA; + + if (unlikely(vbr->flags & VBLK_REQ_FLUSH)) + virtblk_bio_send_flush(vbr); + else + virtblk_bio_send_data(vbr); +} + /* return id (s/n) string for *disk to *id_str */ static int virtblk_get_id(struct gendisk *disk, char *id_str) @@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk) int err = 0; /* We expect one virtqueue, for output. */ - vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); + vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); if (IS_ERR(vblk->vq)) err = PTR_ERR(vblk->vq); @@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) struct virtio_blk *vblk; struct request_queue *q; int err, index; + int pool_size; + u64 cap; u32 v, blk_size, sg_elems, opt_io_size; u16 min_io_size; @@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_free_index; } + init_waitqueue_head(&vblk->queue_wait); vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); mutex_init(&vblk->config_lock); + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); vblk->config_enable = true; @@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) if (err) goto out_free_vblk; - vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); + pool_size = sizeof(struct virtblk_req); + if (use_bio) + pool_size += sizeof(struct scatterlist) * sg_elems; + vblk->pool = mempool_create_kmalloc_pool(1, pool_size); if (!vblk->pool) { err = -ENOMEM; goto out_free_vq; @@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_mempool; } - q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); + q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); if (!q) { err = -ENOMEM; goto out_put_disk; } + if (use_bio) + blk_queue_make_request(q, virtblk_make_request); q->queuedata = vblk; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); @@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) if (!err && opt_io_size) blk_queue_io_opt(q, blk_size * opt_io_size); - add_disk(vblk->disk); err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); if (err) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 060a672..8ab9c3d 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -24,6 +24,8 @@ #include <linux/err.h> #include <linux/freezer.h> #include <linux/fs.h> +#include <linux/splice.h> +#include <linux/pagemap.h> #include <linux/init.h> #include <linux/list.h> #include <linux/poll.h> @@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event, return 0; } +struct buffer_token { + union { + void *buf; + struct scatterlist *sg; + } u; + /* If sgpages == 0 then buf is used, else sg is used */ + unsigned int sgpages; +}; + +static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages) +{ + int i; + struct page *page; + + for (i = 0; i < nrpages; i++) { + page = sg_page(&sg[i]); + if (!page) + break; + put_page(page); + } + kfree(sg); +} + /* Callers must take the port->outvq_lock */ static void reclaim_consumed_buffers(struct port *port) { - void *buf; + struct buffer_token *tok; unsigned int len; if (!port->portdev) { /* Device has been unplugged. vqs are already gone. */ return; } - while ((buf = virtqueue_get_buf(port->out_vq, &len))) { - kfree(buf); + while ((tok = virtqueue_get_buf(port->out_vq, &len))) { + if (tok->sgpages) + reclaim_sg_pages(tok->u.sg, tok->sgpages); + else + kfree(tok->u.buf); + kfree(tok); port->outvq_full = false; } } -static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, - bool nonblock) +static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, + int nents, size_t in_count, + struct buffer_token *tok, bool nonblock) { - struct scatterlist sg[1]; struct virtqueue *out_vq; ssize_t ret; unsigned long flags; @@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, reclaim_consumed_buffers(port); - sg_init_one(sg, in_buf, in_count); - ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC); + ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC); /* Tell Host to go! */ virtqueue_kick(out_vq); @@ -544,6 +572,37 @@ done: return in_count; } +static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, + bool nonblock) +{ + struct scatterlist sg[1]; + struct buffer_token *tok; + + tok = kmalloc(sizeof(*tok), GFP_ATOMIC); + if (!tok) + return -ENOMEM; + tok->sgpages = 0; + tok->u.buf = in_buf; + + sg_init_one(sg, in_buf, in_count); + + return __send_to_port(port, sg, 1, in_count, tok, nonblock); +} + +static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents, + size_t in_count, bool nonblock) +{ + struct buffer_token *tok; + + tok = kmalloc(sizeof(*tok), GFP_ATOMIC); + if (!tok) + return -ENOMEM; + tok->sgpages = nents; + tok->u.sg = sg; + + return __send_to_port(port, sg, nents, in_count, tok, nonblock); +} + /* * Give out the data that's requested from the buffer that we have * queued up. @@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, return fill_readbuf(port, ubuf, count, true); } +static int wait_port_writable(struct port *port, bool nonblock) +{ + int ret; + + if (will_write_block(port)) { + if (nonblock) + return -EAGAIN; + + ret = wait_event_freezable(port->waitqueue, + !will_write_block(port)); + if (ret < 0) + return ret; + } + /* Port got hot-unplugged. */ + if (!port->guest_connected) + return -ENODEV; + + return 0; +} + static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { @@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, nonblock = filp->f_flags & O_NONBLOCK; - if (will_write_block(port)) { - if (nonblock) - return -EAGAIN; - - ret = wait_event_freezable(port->waitqueue, - !will_write_block(port)); - if (ret < 0) - return ret; - } - /* Port got hot-unplugged. */ - if (!port->guest_connected) - return -ENODEV; + ret = wait_port_writable(port, nonblock); + if (ret < 0) + return ret; count = min((size_t)(32 * 1024), count); @@ -725,6 +795,93 @@ out: return ret; } +struct sg_list { + unsigned int n; + unsigned int size; + size_t len; + struct scatterlist *sg; +}; + +static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + struct sg_list *sgl = sd->u.data; + unsigned int offset, len; + + if (sgl->n == sgl->size) + return 0; + + /* Try lock this page */ + if (buf->ops->steal(pipe, buf) == 0) { + /* Get reference and unlock page for moving */ + get_page(buf->page); + unlock_page(buf->page); + + len = min(buf->len, sd->len); + sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset); + } else { + /* Failback to copying a page */ + struct page *page = alloc_page(GFP_KERNEL); + char *src = buf->ops->map(pipe, buf, 1); + char *dst; + + if (!page) + return -ENOMEM; + dst = kmap(page); + + offset = sd->pos & ~PAGE_MASK; + + len = sd->len; + if (len + offset > PAGE_SIZE) + len = PAGE_SIZE - offset; + + memcpy(dst + offset, src + buf->offset, len); + + kunmap(page); + buf->ops->unmap(pipe, buf, src); + + sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); + } + sgl->n++; + sgl->len += len; + + return len; +} + +/* Faster zero-copy write by splicing */ +static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t len, unsigned int flags) +{ + struct port *port = filp->private_data; + struct sg_list sgl; + ssize_t ret; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.data = &sgl, + }; + + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); + if (ret < 0) + return ret; + + sgl.n = 0; + sgl.len = 0; + sgl.size = pipe->nrbufs; + sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL); + if (unlikely(!sgl.sg)) + return -ENOMEM; + + sg_init_table(sgl.sg, sgl.size); + ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); + if (likely(ret > 0)) + ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true); + + return ret; +} + static unsigned int port_fops_poll(struct file *filp, poll_table *wait) { struct port *port; @@ -856,6 +1013,7 @@ static const struct file_operations port_fops = { .open = port_fops_open, .read = port_fops_read, .write = port_fops_write, + .splice_write = port_fops_splice_write, .poll = port_fops_poll, .release = port_fops_release, .fasync = port_fops_fasync, diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 9e8388e..fc92ccb 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -263,6 +263,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, struct virtqueue *vq; int err; + if (!name) + return NULL; + /* We must have this many virtqueues. */ if (index >= ldev->desc->num_vq) return ERR_PTR(-ENOENT); @@ -296,7 +299,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu * barriers. */ - vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev, + vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev, true, lvq->pages, lg_notify, callback, name); if (!vq) { err = -ENOMEM; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 3541b44..e7a4780 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -84,6 +84,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, if (id >= ARRAY_SIZE(rvdev->vring)) return ERR_PTR(-EINVAL); + if (!name) + return NULL; + ret = rproc_alloc_vring(rvdev, id); if (ret) return ERR_PTR(ret); @@ -103,7 +106,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, * Create the new vq, and tell virtio we're not interested in * the 'weak' smp barriers, since we're talking with a real device. */ - vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr, + vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr, rproc_virtio_notify, callback, name); if (!vq) { dev_err(dev, "vring_new_virtqueue %s failed\n", name); diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig index 32aead6..2bd911f 100644 --- a/drivers/rpmsg/Kconfig +++ b/drivers/rpmsg/Kconfig @@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)" config RPMSG tristate select VIRTIO - select VIRTIO_RING depends on EXPERIMENTAL endmenu diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 47cccd5..7dabef6 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -190,6 +190,9 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, if (index >= kdev->desc->num_vq) return ERR_PTR(-ENOENT); + if (!name) + return NULL; + config = kvm_vq_config(kdev->desc)+index; err = vmem_add_mapping(config->address, @@ -198,7 +201,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, if (err) goto out; - vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, + vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN, vdev, true, (void *) config->address, kvm_notify, callback, name); if (!vq) { diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index f38b17a..8d5bddb 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,11 +1,9 @@ -# Virtio always gets selected by whoever wants it. config VIRTIO tristate - -# Similarly the virtio ring implementation. -config VIRTIO_RING - tristate - depends on VIRTIO + ---help--- + This option is selected by any driver which implements the virtio + bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST, + CONFIG_RPMSG or CONFIG_S390_GUEST. menu "Virtio drivers" @@ -13,7 +11,6 @@ config VIRTIO_PCI tristate "PCI driver for virtio devices (EXPERIMENTAL)" depends on PCI && EXPERIMENTAL select VIRTIO - select VIRTIO_RING ---help--- This drivers provides support for virtio based paravirtual device drivers over PCI. This requires that your VMM has appropriate PCI @@ -26,9 +23,8 @@ config VIRTIO_PCI If unsure, say M. config VIRTIO_BALLOON - tristate "Virtio balloon driver (EXPERIMENTAL)" - select VIRTIO - select VIRTIO_RING + tristate "Virtio balloon driver" + depends on VIRTIO ---help--- This driver supports increasing and decreasing the amount of memory within a KVM guest. @@ -39,7 +35,6 @@ config VIRTIO_BALLOON tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)" depends on HAS_IOMEM && EXPERIMENTAL select VIRTIO - select VIRTIO_RING ---help--- This drivers provides support for memory mapped virtio platform device driver. diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 5a4c63c..9076635 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,4 @@ -obj-$(CONFIG_VIRTIO) += virtio.o -obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index c3b3f7f..1e8659c 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -159,7 +159,7 @@ static int virtio_dev_remove(struct device *_d) drv->remove(dev); /* Driver should have reset device. */ - BUG_ON(dev->config->get_status(dev)); + WARN_ON_ONCE(dev->config->get_status(dev)); /* Acknowledge the device's existence again. */ add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 453db0c..6b1b7e1 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -131,9 +131,6 @@ struct virtio_mmio_vq_info { /* the number of entries in the queue */ unsigned int num; - /* the index of the queue */ - int queue_index; - /* the virtual address of the ring queue */ void *queue; @@ -225,11 +222,10 @@ static void vm_reset(struct virtio_device *vdev) static void vm_notify(struct virtqueue *vq) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); - struct virtio_mmio_vq_info *info = vq->priv; /* We write the queue's selector into the notification register to * signal the other end */ - writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); } /* Notify all virtqueues on an interrupt. */ @@ -270,6 +266,7 @@ static void vm_del_vq(struct virtqueue *vq) struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); struct virtio_mmio_vq_info *info = vq->priv; unsigned long flags, size; + unsigned int index = virtqueue_get_queue_index(vq); spin_lock_irqsave(&vm_dev->lock, flags); list_del(&info->node); @@ -278,7 +275,7 @@ static void vm_del_vq(struct virtqueue *vq) vring_del_virtqueue(vq); /* Select and deactivate the queue */ - writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); + writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); @@ -309,6 +306,9 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, unsigned long flags, size; int err; + if (!name) + return NULL; + /* Select the queue we're interested in */ writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL); @@ -324,7 +324,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, err = -ENOMEM; goto error_kmalloc; } - info->queue_index = index; /* Allocate pages for the queue - start with a queue as big as * possible (limited by maximum size allowed by device), drop down @@ -332,11 +331,21 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, * and two rings (which makes it "alignment_size * 2") */ info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX); + + /* If the device reports a 0 entry queue, we won't be able to + * use it to perform I/O, and vring_new_virtqueue() can't create + * empty queues anyway, so don't bother to set up the device. + */ + if (info->num == 0) { + err = -ENOENT; + goto error_alloc_pages; + } + while (1) { size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN)); - /* Already smallest possible allocation? */ - if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) { + /* Did the last iter shrink the queue below minimum size? */ + if (size < VIRTIO_MMIO_VRING_ALIGN * 2) { err = -ENOMEM; goto error_alloc_pages; } @@ -356,7 +365,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); /* Create the vring */ - vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, + vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, true, info->queue, vm_notify, callback, name); if (!vq) { err = -ENOMEM; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 2e03d41..c33aea3 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -48,6 +48,7 @@ struct virtio_pci_device int msix_enabled; int intx_enabled; struct msix_entry *msix_entries; + cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; @@ -79,9 +80,6 @@ struct virtio_pci_vq_info /* the number of entries in the queue */ int num; - /* the index of the queue */ - int queue_index; - /* the virtual address of the ring queue */ void *queue; @@ -202,11 +200,11 @@ static void vp_reset(struct virtio_device *vdev) static void vp_notify(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vq->priv; /* we write the queue's selector into the notification register to * signal the other end */ - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + iowrite16(virtqueue_get_queue_index(vq), + vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); } /* Handle a configuration change: Tell driver if it wants to know. */ @@ -279,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev) for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(vp_dev->msix_entries[i].vector, vp_dev); + for (i = 0; i < vp_dev->msix_vectors; i++) + if (vp_dev->msix_affinity_masks[i]) + free_cpumask_var(vp_dev->msix_affinity_masks[i]); + if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -296,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev) vp_dev->msix_names = NULL; kfree(vp_dev->msix_entries); vp_dev->msix_entries = NULL; + kfree(vp_dev->msix_affinity_masks); + vp_dev->msix_affinity_masks = NULL; } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, @@ -314,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL); if (!vp_dev->msix_names) goto error; + vp_dev->msix_affinity_masks + = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, + GFP_KERNEL); + if (!vp_dev->msix_affinity_masks) + goto error; + for (i = 0; i < nvectors; ++i) + if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], + GFP_KERNEL)) + goto error; for (i = 0; i < nvectors; ++i) vp_dev->msix_entries[i].entry = i; @@ -402,7 +415,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, if (!info) return ERR_PTR(-ENOMEM); - info->queue_index = index; info->num = num; info->msix_vector = msix_vec; @@ -418,7 +430,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ - vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev, + vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev, true, info->queue, vp_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -467,7 +479,8 @@ static void vp_del_vq(struct virtqueue *vq) list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(virtqueue_get_queue_index(vq), + vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -542,7 +555,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { - if (!callbacks[i] || !vp_dev->msix_enabled) + if (!names[i]) { + vqs[i] = NULL; + continue; + } else if (!callbacks[i] || !vp_dev->msix_enabled) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vp_dev->per_vq_vectors) msix_vec = allocated_vectors++; @@ -609,6 +625,35 @@ static const char *vp_bus_name(struct virtio_device *vdev) return pci_name(vp_dev->pci_dev); } +/* Setup the affinity for a virtqueue: + * - force the affinity for per vq vector + * - OR over all affinities for shared MSI + * - ignore the affinity request if we're using INTX + */ +static int vp_set_vq_affinity(struct virtqueue *vq, int cpu) +{ + struct virtio_device *vdev = vq->vdev; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info = vq->priv; + struct cpumask *mask; + unsigned int irq; + + if (!vq->callback) + return -EINVAL; + + if (vp_dev->msix_enabled) { + mask = vp_dev->msix_affinity_masks[info->msix_vector]; + irq = vp_dev->msix_entries[info->msix_vector].vector; + if (cpu == -1) + irq_set_affinity_hint(irq, NULL); + else { + cpumask_set_cpu(cpu, mask); + irq_set_affinity_hint(irq, mask); + } + } + return 0; +} + static struct virtio_config_ops virtio_pci_config_ops = { .get = vp_get, .set = vp_set, @@ -620,6 +665,7 @@ static struct virtio_config_ops virtio_pci_config_ops = { .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, + .set_vq_affinity = vp_set_vq_affinity, }; static void virtio_pci_release_dev(struct device *_d) @@ -673,8 +719,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, goto out_enable_device; vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (vp_dev->ioaddr == NULL) + if (vp_dev->ioaddr == NULL) { + err = -ENOMEM; goto out_req_regions; + } pci_set_drvdata(pci_dev, vp_dev); pci_set_master(pci_dev); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5aa43c3..e639584 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -106,6 +106,9 @@ struct vring_virtqueue /* How to notify other side. FIXME: commonalize hcalls! */ void (*notify)(struct virtqueue *vq); + /* Index of the queue */ + int queue_index; + #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -171,6 +174,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } +int virtqueue_get_queue_index(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + return vq->queue_index; +} +EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); + /** * virtqueue_add_buf - expose buffer to other end * @vq: the struct virtqueue we're talking about. @@ -616,7 +626,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } EXPORT_SYMBOL_GPL(vring_interrupt); -struct virtqueue *vring_new_virtqueue(unsigned int num, +struct virtqueue *vring_new_virtqueue(unsigned int index, + unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, @@ -647,6 +658,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; + vq->queue_index = index; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; |