From f68d24082e22ccee3077d11aeb6dc5354f0ca7f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:29 -0600
Subject: virtio_pci: minor MSI-X cleanups

1) Rename vp_request_vectors to vp_request_msix_vectors, and take
   non-MSI-X case out to caller.
2) Comment weird pci_enable_msix API
3) Rename vp_find_vq to setup_vq.
4) Fix spaces to tabs
5) Make nvectors calc internal to vp_try_to_find_vqs()
6) Rename vector to msix_vector for more clarity.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
---
 drivers/virtio/virtio_pci.c | 125 +++++++++++++++++++++++++-------------------
 1 file changed, 70 insertions(+), 55 deletions(-)

(limited to 'drivers')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 248e00e..4a1f1eb 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -84,7 +84,7 @@ struct virtio_pci_vq_info
 	struct list_head node;
 
 	/* MSI-X vector (or none) */
-	unsigned vector;
+	unsigned msix_vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -280,25 +280,14 @@ static void vp_free_vectors(struct virtio_device *vdev)
 	vp_dev->msix_entries = NULL;
 }
 
-static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
-			      bool per_vq_vectors)
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+				   bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	unsigned i, v;
 	int err = -ENOMEM;
 
-	if (!nvectors) {
-		/* Can't allocate MSI-X vectors, use regular interrupt */
-		vp_dev->msix_vectors = 0;
-		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
-				  IRQF_SHARED, name, vp_dev);
-		if (err)
-			return err;
-		vp_dev->intx_enabled = 1;
-		return 0;
-	}
-
 	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
 				       GFP_KERNEL);
 	if (!vp_dev->msix_entries)
@@ -311,6 +300,7 @@ static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
 	for (i = 0; i < nvectors; ++i)
 		vp_dev->msix_entries[i].entry = i;
 
+	/* pci_enable_msix returns positive if we can't get this many. */
 	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
 	if (err > 0)
 		err = -ENOSPC;
@@ -356,10 +346,22 @@ error:
 	return err;
 }
 
-static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq),
-				    const char *name,
-				    u16 vector)
+static int vp_request_intx(struct virtio_device *vdev)
+{
+	int err;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
+	if (!err)
+		vp_dev->intx_enabled = 1;
+	return err;
+}
+
+static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
+				  void (*callback)(struct virtqueue *vq),
+				  const char *name,
+				  u16 msix_vec)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
@@ -384,7 +386,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	info->queue_index = index;
 	info->num = num;
-	info->vector = vector;
+	info->msix_vector = msix_vec;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -408,10 +410,10 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
-	 if (vector != VIRTIO_MSI_NO_VECTOR) {
-		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		if (vector == VIRTIO_MSI_NO_VECTOR) {
+	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+		iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			err = -EBUSY;
 			goto out_assign;
 		}
@@ -472,7 +474,8 @@ static void vp_del_vqs(struct virtio_device *vdev)
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 		info = vq->priv;
 		if (vp_dev->per_vq_vectors)
-			free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
+				 vq);
 		vp_del_vq(vq);
 	}
 	vp_dev->per_vq_vectors = false;
@@ -484,38 +487,58 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			      struct virtqueue *vqs[],
 			      vq_callback_t *callbacks[],
 			      const char *names[],
-			      int nvectors,
+			      bool use_msix,
 			      bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u16 vector;
-	int i, err, allocated_vectors;
+	u16 msix_vec;
+	int i, err, nvectors, allocated_vectors;
 
-	err = vp_request_vectors(vdev, nvectors, per_vq_vectors);
-	if (err)
-		goto error_request;
+	if (!use_msix) {
+		/* Old style: one normal interrupt for change and all vqs. */
+		err = vp_request_intx(vdev);
+		if (err)
+			goto error_request;
+	} else {
+		if (per_vq_vectors) {
+			/* Best option: one for change interrupt, one per vq. */
+			nvectors = 1;
+			for (i = 0; i < nvqs; ++i)
+				if (callbacks[i])
+					++nvectors;
+		} else {
+			/* Second best: one for change, shared for all vqs. */
+			nvectors = 2;
+		}
+
+		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+		if (err)
+			goto error_request;
+	}
 
 	vp_dev->per_vq_vectors = per_vq_vectors;
 	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!callbacks[i] || !vp_dev->msix_enabled)
-			vector = VIRTIO_MSI_NO_VECTOR;
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
 		else if (vp_dev->per_vq_vectors)
-			vector = allocated_vectors++;
+			msix_vec = allocated_vectors++;
 		else
-			vector = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i], vector);
+			msix_vec = VP_MSIX_VQ_VECTOR;
+		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
 		}
 		/* allocate per-vq irq if available and necessary */
-		if (vp_dev->per_vq_vectors && vector != VIRTIO_MSI_NO_VECTOR) {
-			snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
-				 "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]);
-			err = request_irq(vp_dev->msix_entries[vector].vector,
-					  vring_interrupt, 0,
-					  vp_dev->msix_names[vector], vqs[i]);
+		if (vp_dev->per_vq_vectors) {
+			snprintf(vp_dev->msix_names[msix_vec],
+				 sizeof *vp_dev->msix_names,
+				 "%s-%s",
+				 dev_name(&vp_dev->vdev.dev), names[i]);
+			err = request_irq(msix_vec, vring_interrupt, 0,
+					  vp_dev->msix_names[msix_vec],
+					  vqs[i]);
 			if (err) {
 				vp_del_vq(vqs[i]);
 				goto error_find;
@@ -537,28 +560,20 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       vq_callback_t *callbacks[],
 		       const char *names[])
 {
-	int vectors = 0;
-	int i, uninitialized_var(err);
-
-	/* How many vectors would we like? */
-	for (i = 0; i < nvqs; ++i)
-		if (callbacks[i])
-			++vectors;
+	int err;
 
-	/* We want at most one vector per queue and one for config changes. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 vectors + 1, true);
+	/* Try MSI-X with one vector per queue. */
+	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
 	if (!err)
 		return 0;
-	/* Fallback to separate vectors for config and a shared for queues. */
+	/* Fallback: MSI-X with one vector for config, one shared for queues. */
 	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 2, false);
+				 true, false);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 0, false);
-	return err;
+	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+				  false, false);
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {
-- 
cgit v1.1


From 3c1b27d5043086a485f8526353ae9fe37bfa1065 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:31 -0600
Subject: virtio: make add_buf return capacity remaining

This API change means that virtio_net can tell how much capacity
remains for buffers.  It's necessarily fuzzy, since
VIRTIO_RING_F_INDIRECT_DESC means we can fit any number of descriptors
in one, *if* we can kmalloc.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dinesh Subhraveti <dineshs@us.ibm.com>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  4 ++--
 drivers/net/virtio_net.c            | 14 +++++++-------
 drivers/virtio/virtio_balloon.c     |  2 +-
 drivers/virtio/virtio_ring.c        |  6 +++++-
 6 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5..d739ee4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -139,7 +139,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 32216b6..b6c24dc 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -51,7 +51,7 @@ static void register_buffer(void)
 
 	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
-	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 }
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index c74dacf..a035ae3 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -65,7 +65,7 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 
 	/* add_buf wants a token to identify this buffer: we hand it any
 	 * non-NULL pointer, since there's only ever one buffer. */
-	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
 		/* Tell Host to go! */
 		out_vq->vq_ops->kick(out_vq);
 		/* Chill out until it's done with the buffer. */
@@ -85,7 +85,7 @@ static void add_inbuf(void)
 	sg_init_one(sg, inbuf, PAGE_SIZE);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
 		BUG();
 	in_vq->vq_ops->kick(in_vq);
 }
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 32266fb..fbf04a5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -320,7 +320,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			trim_pages(vi, skb);
 			kfree_skb(skb);
@@ -373,7 +373,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			kfree_skb(skb);
 			break;
@@ -527,7 +527,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 
 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
-	if (!err && !vi->free_in_tasklet)
+	if (err >= 0 && !vi->free_in_tasklet)
 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
 
 	return err;
@@ -538,7 +538,7 @@ static void xmit_tasklet(unsigned long data)
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
+	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) {
 		vi->svq->vq_ops->kick(vi->svq);
 		vi->last_xmit_skb = NULL;
 	}
@@ -557,7 +557,7 @@ again:
 
 	/* If we has a buffer left over from last time, send it now. */
 	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
+	    xmit_skb(vi, vi->last_xmit_skb) < 0)
 		goto stop_queue;
 
 	vi->last_xmit_skb = NULL;
@@ -565,7 +565,7 @@ again:
 	/* Put new one in send queue and do transmit */
 	if (likely(skb)) {
 		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
+		if (xmit_skb(vi, skb) < 0) {
 			vi->last_xmit_skb = skb;
 			skb = NULL;
 			goto stop_queue;
@@ -668,7 +668,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi));
+	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
 
 	vi->cvq->vq_ops->kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 26b2782..3978923 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -84,7 +84,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a882f26..f536005 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -208,7 +208,11 @@ add_head:
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
-	return 0;
+
+	/* If we're indirect, we can fit many (assuming not OOM). */
+	if (vq->indirect)
+		return vq->num_free ? vq->vring.num : 0;
+	return vq->num_free;
 }
 
 static void vring_kick(struct virtqueue *_vq)
-- 
cgit v1.1


From 3ca4f5ca73057a617f9444a91022d7127041970a Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 31 Jul 2009 15:25:56 +0900
Subject: virtio: add virtio IDs file

Virtio IDs are spread all over the tree which makes assigning new IDs
bothersome. Putting them together should make the process less error-prone.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c          | 1 +
 drivers/char/hw_random/virtio-rng.c | 1 +
 drivers/char/virtio_console.c       | 1 +
 drivers/net/virtio_net.c            | 1 +
 drivers/virtio/virtio_balloon.c     | 1 +
 5 files changed, 5 insertions(+)

(limited to 'drivers')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d739ee4..73de753 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,6 +3,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index b6c24dc..962968f 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,6 +21,7 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae3..0d328b5 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fbf04a5..5c498d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 3978923..200c22f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,6 +19,7 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
-- 
cgit v1.1


From f1b0ef062602713c2c7cfa12362d5d90ed01c5f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:57:42 +0200
Subject: virtio_blk: add support for cache flush

Recent qemu has added a VIRTIO_BLK_F_FLUSH flag to advertise that the
virtual disk has a volatile write cache that needs to be flushed.  In case
we see this feature implement tell the Linux block layer about the fact
and use the new VIRTIO_BLK_T_FLUSH to flush the cache when required.  This
allows for an correct and simple implementation of write barriers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 73de753..3d5fe975 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -92,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		return false;
 
 	vbr->req = req;
-	if (blk_fs_request(vbr->req)) {
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else if (blk_pc_request(vbr->req)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else {
+		break;
+	case REQ_TYPE_LINUX_BLOCK:
+		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
+			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
 		/* We don't put anything else in the queue. */
 		BUG();
 	}
@@ -200,6 +211,12 @@ out:
 	return err;
 }
 
+static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
@@ -338,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+				  virtblk_prepare_flush);
+	else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* If disk is read-only in the host, the guest should obey */
@@ -425,7 +445,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
 };
 
 /*
-- 
cgit v1.1


From 4c1ea3dd718a1d93a726cb3e66665ac4170dcccd Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:45 -0600
Subject: lguest: use set_pte/set_pmd uniformly for real page table entries

If we're building a pte, we can use simple assigment; only use set_pte
etc. when we're actually going to use that destination as a PTE.  I
don't know that we'll ever run under Xen, but it's neater.

And use set_pte/set_pmd rather than assuming native_ versions, even
though that's probably true for most people.

(Includes compile fix by Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Matias Zabaljauregui <zabaljauregui@gmail.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
---
 drivers/lguest/page_tables.c | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

(limited to 'drivers')

diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee..232fba6 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -380,7 +380,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * And we copy the flags to the shadow PMD entry.  The page
 		 * number in the shadow PMD is the page we just allocated.
 		 */
-		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
+		set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
 	}
 
 	/*
@@ -447,7 +447,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		 * we will come back here when a write does actually occur, so
 		 * we can update the Guest's _PAGE_DIRTY flag.
 		 */
-		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
+		set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
 
 	/*
 	 * Finally, we write the Guest PTE entry back: we've set the
@@ -528,7 +528,7 @@ static void release_pmd(pmd_t *spmd)
 		/* Now we can free the page of PTEs */
 		free_page((long)ptepage);
 		/* And zero out the PMD entry so we never release it twice. */
-		native_set_pmd(spmd, __pmd(0));
+		set_pmd(spmd, __pmd(0));
 	}
 }
 
@@ -833,15 +833,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 			 */
 			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
 				check_gpte(cpu, gpte);
-				native_set_pte(spte,
-						gpte_to_spte(cpu, gpte,
+				set_pte(spte,
+					gpte_to_spte(cpu, gpte,
 						pte_flags(gpte) & _PAGE_DIRTY));
 			} else {
 				/*
 				 * Otherwise kill it and we can demand_page()
 				 * it in later.
 				 */
-				native_set_pte(spte, __pte(0));
+				set_pte(spte, __pte(0));
 			}
 #ifdef CONFIG_X86_PAE
 		}
@@ -983,16 +983,15 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	 */
 	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
 	     i += PTRS_PER_PTE, j++) {
-		/* FIXME: native_set_pmd is overkill here. */
-		native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
-		- mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+		pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
+			      __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
 
 		if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
 			return -EFAULT;
 	}
 
 	/* One PGD entry, pointing to that PMD page. */
-	set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
+	pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
 	/* Copy it in as the first PGD entry (ie. addresses 0-1G). */
 	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
 		return -EFAULT;
@@ -1141,15 +1140,13 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
 	pte_t regs_pte;
-	unsigned long pfn;
 
 #ifdef CONFIG_X86_PAE
 	pmd_t switcher_pmd;
 	pmd_t *pmd_table;
 
-	/* FIXME: native_set_pmd is overkill here. */
-	native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
-		       PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
+			       PAGE_KERNEL_EXEC);
 
 	/* Figure out where the pmd page is, by reading the PGD, and converting
 	 * it to a virtual address. */
@@ -1157,7 +1154,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 			pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
 								<< PAGE_SHIFT);
 	/* Now write it into the shadow page table. */
-	native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
+	set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
 #else
 	pgd_t switcher_pgd;
 
@@ -1179,10 +1176,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 	 * page is already mapped there, we don't have to copy them out
 	 * again.
 	 */
-	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
-	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
-	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
-			regs_pte);
+	regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL);
+	set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte);
 }
 /*:*/
 
@@ -1209,7 +1204,7 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 
 	/* The first entries are easy: they map the Switcher code. */
 	for (i = 0; i < pages; i++) {
-		native_set_pte(&pte[i], mk_pte(switcher_page[i],
+		set_pte(&pte[i], mk_pte(switcher_page[i],
 				__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 	}
 
@@ -1217,14 +1212,14 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 	i = pages + cpu*2;
 
 	/* First page (Guest registers) is writable from the Guest */
-	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
+	set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
 			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
 
 	/*
 	 * The second page contains the "struct lguest_ro_state", and is
 	 * read-only.
 	 */
-	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
+	set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
 			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 }
 
-- 
cgit v1.1


From fb100d78c04ff6053047625d0368d0d4b1d9912a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:46 -0600
Subject: lguest: use PGDIR_SHIFT for PAE code to allow different PAGE_OFFSET

We still assume the Guest and Host have the same PAGE_OFFSET settings,
but now we don't assume 0xC0000000.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Matias Zabaljauregui <zabaljauregui@gmail.com>
---
 drivers/lguest/page_tables.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 232fba6..bf37a31 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -996,11 +996,9 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
 		return -EFAULT;
 	/*
-	 * And the third PGD entry (ie. addresses 3G-4G).
-	 *
-	 * FIXME: This assumes that PAGE_OFFSET for the Guest is 0xC0000000.
+	 * And the other PGD entry to make the linear mapping at PAGE_OFFSET
 	 */
-	if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
+	if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
 		return -EFAULT;
 #else
 	/*
-- 
cgit v1.1


From 6c189d8312246af776c2587c233d6afcf3714438 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 5 Aug 2009 17:42:37 +0800
Subject: lguest: cleanup for map_switcher()

We can use alloc_page() instead of get_zeroed_page() and virt_to_page()

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 1e2cb84..8744d24 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -67,12 +67,11 @@ static __init int map_switcher(void)
 	 * so we make sure they're zeroed.
 	 */
 	for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
-		unsigned long addr = get_zeroed_page(GFP_KERNEL);
-		if (!addr) {
+		switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!switcher_page[i]) {
 			err = -ENOMEM;
 			goto free_some_pages;
 		}
-		switcher_page[i] = virt_to_page(addr);
 	}
 
 	/*
-- 
cgit v1.1