From 81aa8efe0190cf5bf7eaafb57341cd7d0aea96cd Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 16:13:44 +0200
Subject: KVM: s390: add and extend interrupt information data structs

With the currently available struct kvm_s390_interrupt it is not possible to
inject every kind of interrupt as defined in the z/Architecture. Add
additional interruption parameters to the structures and move it to kvm.h

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 34 +---------------------------------
 1 file changed, 1 insertion(+), 33 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index eef3dd3..3ffc964 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
+#include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
 
@@ -168,18 +169,6 @@ struct kvm_vcpu_stat {
 	u32 diagnose_9c;
 };
 
-struct kvm_s390_io_info {
-	__u16        subchannel_id;            /* 0x0b8 */
-	__u16        subchannel_nr;            /* 0x0ba */
-	__u32        io_int_parm;              /* 0x0bc */
-	__u32        io_int_word;              /* 0x0c0 */
-};
-
-struct kvm_s390_ext_info {
-	__u32 ext_params;
-	__u64 ext_params2;
-};
-
 #define PGM_OPERATION            0x01
 #define PGM_PRIVILEGED_OP	 0x02
 #define PGM_EXECUTE              0x03
@@ -188,27 +177,6 @@ struct kvm_s390_ext_info {
 #define PGM_SPECIFICATION        0x06
 #define PGM_DATA                 0x07
 
-struct kvm_s390_pgm_info {
-	__u16 code;
-};
-
-struct kvm_s390_prefix_info {
-	__u32 address;
-};
-
-struct kvm_s390_extcall_info {
-	__u16 code;
-};
-
-struct kvm_s390_emerg_info {
-	__u16 code;
-};
-
-struct kvm_s390_mchk_info {
-	__u64 cr14;
-	__u64 mcic;
-};
-
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
-- 
cgit v1.1


From c05c4186bbe4e99d64e8a36f7ca7f480da5d109f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 16:13:45 +0200
Subject: KVM: s390: add floating irq controller

This patch adds a floating irq controller as a kvm_device.
It will be necessary for migration of floating interrupts as well
as for hardening the reset code by allowing user space to explicitly
remove all pending floating interrupts.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |   1 +
 arch/s390/include/uapi/asm/kvm.h |  14 ++
 arch/s390/kvm/interrupt.c        | 304 ++++++++++++++++++++++++++++++++-------
 arch/s390/kvm/kvm-s390.c         |   1 +
 4 files changed, 269 insertions(+), 51 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3ffc964..59635b5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -243,6 +243,7 @@ struct kvm_arch{
 	struct sca_block *sca;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
+	struct kvm_device *flic;
 	struct gmap *gmap;
 	int css_support;
 };
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da59..38d5f98 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -16,6 +16,20 @@
 
 #define __KVM_S390
 
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS	1
+#define KVM_DEV_FLIC_ENQUEUE		2
+#define KVM_DEV_FLIC_CLEAR_IRQS		3
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+
+#define KVM_S390_FLIC_MAX_BUFFER	0x2000000
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5f79d2d..a5f18ba 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -659,53 +659,86 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 	return inti;
 }
 
-int kvm_s390_inject_vm(struct kvm *kvm,
-		       struct kvm_s390_interrupt *s390int)
+static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_local_interrupt *li;
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *inti, *iter;
+	struct kvm_s390_interrupt_info *iter;
 	int sigcpu;
 
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	if (!is_ioint(inti->type)) {
+		list_add_tail(&inti->list, &fi->list);
+	} else {
+		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+
+		/* Keep I/O interrupts sorted in isc order. */
+		list_for_each_entry(iter, &fi->list, list) {
+			if (!is_ioint(iter->type))
+				continue;
+			if (int_word_to_isc_bits(iter->io.io_int_word)
+			    <= isc_bits)
+				continue;
+			break;
+		}
+		list_add_tail(&inti->list, &iter->list);
+	}
+	atomic_set(&fi->active, 1);
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (fi->local_int[sigcpu] == NULL);
+	}
+	li = fi->local_int[sigcpu];
+	spin_lock_bh(&li->lock);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
+	spin_unlock_bh(&li->lock);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct kvm_s390_interrupt_info *inti;
+
 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
 	if (!inti)
 		return -ENOMEM;
 
-	switch (s390int->type) {
+	inti->type = s390int->type;
+	switch (inti->type) {
 	case KVM_S390_INT_VIRTIO:
 		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
 			 s390int->parm, s390int->parm64);
-		inti->type = s390int->type;
 		inti->ext.ext_params = s390int->parm;
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_INT_SERVICE:
 		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
-		inti->type = s390int->type;
 		inti->ext.ext_params = s390int->parm;
 		break;
-	case KVM_S390_PROGRAM_INT:
-	case KVM_S390_SIGP_STOP:
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
-		kfree(inti);
-		return -EINVAL;
 	case KVM_S390_MCHK:
 		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
 			 s390int->parm64);
-		inti->type = s390int->type;
 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (s390int->type & IOINT_AI_MASK)
+		if (inti->type & IOINT_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
 		else
 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
 				 s390int->type & IOINT_CSSID_MASK,
 				 s390int->type & IOINT_SSID_MASK,
 				 s390int->type & IOINT_SCHID_MASK);
-		inti->type = s390int->type;
 		inti->io.subchannel_id = s390int->parm >> 16;
 		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
 		inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -718,42 +751,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
 				 2);
 
-	mutex_lock(&kvm->lock);
-	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	if (!is_ioint(inti->type))
-		list_add_tail(&inti->list, &fi->list);
-	else {
-		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
-
-		/* Keep I/O interrupts sorted in isc order. */
-		list_for_each_entry(iter, &fi->list, list) {
-			if (!is_ioint(iter->type))
-				continue;
-			if (int_word_to_isc_bits(iter->io.io_int_word)
-			    <= isc_bits)
-				continue;
-			break;
-		}
-		list_add_tail(&inti->list, &iter->list);
-	}
-	atomic_set(&fi->active, 1);
-	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
-	if (sigcpu == KVM_MAX_VCPUS) {
-		do {
-			sigcpu = fi->next_rr_cpu++;
-			if (sigcpu == KVM_MAX_VCPUS)
-				sigcpu = fi->next_rr_cpu = 0;
-		} while (fi->local_int[sigcpu] == NULL);
-	}
-	li = fi->local_int[sigcpu];
-	spin_lock_bh(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(li->wq))
-		wake_up_interruptible(li->wq);
-	spin_unlock_bh(&li->lock);
-	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
+	__inject_vm(kvm, inti);
 	return 0;
 }
 
@@ -841,3 +839,207 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	mutex_unlock(&vcpu->kvm->lock);
 	return 0;
 }
+
+static void clear_floating_interrupts(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info	*n, *inti = NULL;
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	list_for_each_entry_safe(inti, n, &fi->list, list) {
+		list_del(&inti->list);
+		kfree(inti);
+	}
+	atomic_set(&fi->active, 0);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+}
+
+static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+				   u8 *addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	struct kvm_s390_irq irq = {0};
+
+	irq.type = inti->type;
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		irq.u.ext = inti->ext;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		irq.u.io = inti->io;
+		break;
+	case KVM_S390_MCHK:
+		irq.u.mchk = inti->mchk;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_to_user(uptr, &irq, sizeof(irq)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+{
+	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi;
+	int ret = 0;
+	int n = 0;
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+
+	list_for_each_entry(inti, &fi->list, list) {
+		if (len < sizeof(struct kvm_s390_irq)) {
+			/* signal userspace to try again */
+			ret = -ENOMEM;
+			break;
+		}
+		ret = copy_irq_to_user(inti, buf);
+		if (ret)
+			break;
+		buf += sizeof(struct kvm_s390_irq);
+		len -= sizeof(struct kvm_s390_irq);
+		n++;
+	}
+
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+
+	return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+		r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+					  attr->attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+				     u64 addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	void *target = NULL;
+	void __user *source;
+	u64 size;
+
+	if (get_user(inti->type, (u64 __user *)addr))
+		return -EFAULT;
+
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		target = (void *) &inti->ext;
+		source = &uptr->u.ext;
+		size = sizeof(inti->ext);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		target = (void *) &inti->io;
+		source = &uptr->u.io;
+		size = sizeof(inti->io);
+		break;
+	case KVM_S390_MCHK:
+		target = (void *) &inti->mchk;
+		source = &uptr->u.mchk;
+		size = sizeof(inti->mchk);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_from_user(target, source, size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int r = 0;
+	int len = attr->attr;
+
+	if (len % sizeof(struct kvm_s390_irq) != 0)
+		return -EINVAL;
+	else if (len > KVM_S390_FLIC_MAX_BUFFER)
+		return -EINVAL;
+
+	while (len >= sizeof(struct kvm_s390_irq)) {
+		inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+		if (!inti)
+			return -ENOMEM;
+
+		r = copy_irq_from_user(inti, attr->addr);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		__inject_vm(dev->kvm, inti);
+		len -= sizeof(struct kvm_s390_irq);
+		attr->addr += sizeof(struct kvm_s390_irq);
+	}
+
+	return r;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r = 0;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_ENQUEUE:
+		r = enqueue_floating_irq(dev, attr);
+		break;
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+		r = 0;
+		clear_floating_interrupts(dev->kvm);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+	if (!dev)
+		return -EINVAL;
+	if (dev->kvm->arch.flic)
+		return -EINVAL;
+	dev->kvm->arch.flic = dev;
+	return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+	dev->kvm->arch.flic = NULL;
+	kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+	.name = "kvm-flic",
+	.get_attr = flic_get_attr,
+	.set_attr = flic_set_attr,
+	.create = flic_create,
+	.destroy = flic_destroy,
+};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e0676f3..782420f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -157,6 +157,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_S390_CSS_SUPPORT:
 	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
-- 
cgit v1.1


From a91b8ebe8671980151e0a19ee9fec6b0e1ae1d58 Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Thu, 30 Jan 2014 08:40:23 +0100
Subject: KVM: s390: limit floating irqs

Userspace can flood the kernel with interrupts as of now, so let's
limit the number of pending floating interrupts injected via either
the floating interrupt controller or the KVM_S390_INTERRUPT ioctl.

We can have up to 4*64k pending subchannels + 8 adapter interrupts,
as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
There are also sclp and machine checks. This gives us
(4*65536+8+64*64+1+1) = 266250 interrupts.

Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/include/uapi/asm/kvm.h |  2 +-
 arch/s390/kvm/interrupt.c        | 26 +++++++++++++++++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 59635b5..c3c5e10 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -214,6 +214,7 @@ struct kvm_s390_float_interrupt {
 	unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
 				/ sizeof(long)];
 	struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+	unsigned int irq_count;
 };
 
 
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 38d5f98..058b178 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -27,7 +27,7 @@
  * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
  * Lets round up to 8192 pages.
  */
-
+#define KVM_S390_MAX_FLOAT_IRQS	266250
 #define KVM_S390_FLIC_MAX_BUFFER	0x2000000
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a5f18ba..9c9192b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -528,6 +528,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 			list_for_each_entry_safe(inti, n, &fi->list, list) {
 				if (__interrupt_is_deliverable(vcpu, inti)) {
 					list_del(&inti->list);
+					fi->irq_count--;
 					deliver = 1;
 					break;
 				}
@@ -583,6 +584,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
 				if ((inti->type == KVM_S390_MCHK) &&
 				    __interrupt_is_deliverable(vcpu, inti)) {
 					list_del(&inti->list);
+					fi->irq_count--;
 					deliver = 1;
 					break;
 				}
@@ -650,8 +652,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 		inti = iter;
 		break;
 	}
-	if (inti)
+	if (inti) {
 		list_del_init(&inti->list);
+		fi->irq_count--;
+	}
 	if (list_empty(&fi->list))
 		atomic_set(&fi->active, 0);
 	spin_unlock(&fi->lock);
@@ -659,16 +663,22 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 	return inti;
 }
 
-static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_local_interrupt *li;
 	struct kvm_s390_float_interrupt *fi;
 	struct kvm_s390_interrupt_info *iter;
 	int sigcpu;
+	int rc = 0;
 
 	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
+	if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+		rc = -EINVAL;
+		goto unlock_fi;
+	}
+	fi->irq_count++;
 	if (!is_ioint(inti->type)) {
 		list_add_tail(&inti->list, &fi->list);
 	} else {
@@ -700,8 +710,10 @@ static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	if (waitqueue_active(li->wq))
 		wake_up_interruptible(li->wq);
 	spin_unlock_bh(&li->lock);
+unlock_fi:
 	spin_unlock(&fi->lock);
 	mutex_unlock(&kvm->lock);
+	return rc;
 }
 
 int kvm_s390_inject_vm(struct kvm *kvm,
@@ -751,8 +763,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
 				 2);
 
-	__inject_vm(kvm, inti);
-	return 0;
+	return __inject_vm(kvm, inti);
 }
 
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -852,6 +863,7 @@ static void clear_floating_interrupts(struct kvm *kvm)
 		list_del(&inti->list);
 		kfree(inti);
 	}
+	fi->irq_count = 0;
 	atomic_set(&fi->active, 0);
 	spin_unlock(&fi->lock);
 	mutex_unlock(&kvm->lock);
@@ -992,7 +1004,11 @@ static int enqueue_floating_irq(struct kvm_device *dev,
 			kfree(inti);
 			return r;
 		}
-		__inject_vm(dev->kvm, inti);
+		r = __inject_vm(dev->kvm, inti);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
 		len -= sizeof(struct kvm_s390_irq);
 		attr->addr += sizeof(struct kvm_s390_irq);
 	}
-- 
cgit v1.1


From 24eb3a824c4f3ccfaa2305dc1d9d9e2a708828c5 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 17 Jun 2013 16:25:18 +0200
Subject: KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault

In the case of a fault, we will retry to exit sie64 but with gmap fault
indication for this thread set. This makes it possible to handle async
page faults.

Based on a patch from Martin Schwidefsky.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h   |  2 ++
 arch/s390/include/asm/processor.h |  1 +
 arch/s390/kvm/kvm-s390.c          | 23 +++++++++++++++++++++--
 arch/s390/mm/fault.c              | 26 ++++++++++++++++++++++----
 4 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2204400..66101f6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
  */
 struct gmap {
 	struct list_head list;
@@ -775,6 +776,7 @@ struct gmap {
 	unsigned long asce;
 	void *private;
 	struct list_head crst_list;
+	bool pfault_enabled;
 };
 
 /**
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0a876bc..dc5fc4f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -79,6 +79,7 @@ struct thread_struct {
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
 	unsigned long gmap_addr;	/* address of last gmap fault. */
+	unsigned int gmap_pfault;	/* signal of a pending guest pfault */
 	struct per_regs per_user;	/* User specified PER registers */
 	struct per_event per_event;	/* Cause of the last PER trap */
 	unsigned long per_flags;	/* Flags to control debug behavior */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 782420f..9eec794 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -255,6 +255,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
 		kvm->arch.gmap->private = kvm;
+		kvm->arch.gmap->pfault_enabled = 0;
 	}
 
 	kvm->arch.css_support = 0;
@@ -701,6 +702,17 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+{
+	long rc;
+	hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+	struct mm_struct *mm = current->mm;
+	down_read(&mm->mmap_sem);
+	rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+	up_read(&mm->mmap_sem);
+	return rc;
+}
+
 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 {
 	int rc, cpuflags;
@@ -730,7 +742,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
-	int rc;
+	int rc = -1;
 
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
@@ -744,7 +756,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 						current->thread.gmap_addr;
 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
 		rc = -EREMOTE;
-	} else {
+
+	} else if (current->thread.gmap_pfault) {
+		current->thread.gmap_pfault = 0;
+		if (kvm_arch_fault_in_sync(vcpu) >= 0)
+			rc = 0;
+	}
+
+	if (rc == -1) {
 		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
 		trace_kvm_s390_sie_fault(vcpu);
 		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d95265b2..88cef50 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
 #define VM_FAULT_BADMAP		0x020000
 #define VM_FAULT_BADACCESS	0x040000
 #define VM_FAULT_SIGNAL		0x080000
+#define VM_FAULT_PFAULT		0x100000
 
 static unsigned long store_indication __read_mostly;
 
@@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
 			return;
 		}
 	case VM_FAULT_BADCONTEXT:
+	case VM_FAULT_PFAULT:
 		do_no_context(regs);
 		break;
 	case VM_FAULT_SIGNAL:
@@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
  */
 static inline int do_exception(struct pt_regs *regs, int access)
 {
+#ifdef CONFIG_PGSTE
+	struct gmap *gmap;
+#endif
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
@@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	down_read(&mm->mmap_sem);
 
 #ifdef CONFIG_PGSTE
-	if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
-		address = __gmap_fault(address,
-				     (struct gmap *) S390_lowcore.gmap);
+	gmap = (struct gmap *)
+		((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+	if (gmap) {
+		address = __gmap_fault(address, gmap);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
 			goto out_up;
@@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
 			fault = VM_FAULT_OOM;
 			goto out_up;
 		}
+		if (gmap->pfault_enabled)
+			flags |= FAULT_FLAG_RETRY_NOWAIT;
 	}
 #endif
 
@@ -371,9 +379,19 @@ retry:
 				      regs, address);
 		}
 		if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+			if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				/* FAULT_FLAG_RETRY_NOWAIT has been set,
+				 * mmap_sem has not been released */
+				current->thread.gmap_pfault = 1;
+				fault = VM_FAULT_PFAULT;
+				goto out_up;
+			}
+#endif
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+				   FAULT_FLAG_RETRY_NOWAIT);
 			flags |= FAULT_FLAG_TRIED;
 			down_read(&mm->mmap_sem);
 			goto retry;
-- 
cgit v1.1


From 3c038e6be0e299d4d3762d0a9a29f02de6e04991 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 17:11:48 +0200
Subject: KVM: async_pf: Async page fault support on s390

This patch enables async page faults for s390 kvm guests.
It provides the userspace API to enable and disable_wait this feature.
The disable_wait will enforce that the feature is off by waiting on it.
Also it includes the diagnose code, called by the guest to enable async page faults.

The async page faults will use an already existing guest interface for this
purpose, as described in "CP Programming Services (SC24-6084)".

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 22 ++++++++++
 arch/s390/include/uapi/asm/kvm.h |  2 +
 arch/s390/kvm/Kconfig            |  2 +
 arch/s390/kvm/Makefile           |  2 +-
 arch/s390/kvm/diag.c             | 84 +++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/interrupt.c        | 65 +++++++++++++++++++++++++----
 arch/s390/kvm/kvm-s390.c         | 89 +++++++++++++++++++++++++++++++++++++++-
 arch/s390/kvm/kvm-s390.h         |  4 ++
 arch/s390/kvm/sigp.c             |  7 ++++
 arch/s390/kvm/trace.h            | 46 +++++++++++++++++++++
 10 files changed, 314 insertions(+), 9 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c3c5e10..2c69ba2 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -231,6 +231,10 @@ struct kvm_vcpu_arch {
 		u64		stidp_data;
 	};
 	struct gmap *gmap;
+#define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
+	unsigned long pfault_token;
+	unsigned long pfault_select;
+	unsigned long pfault_compare;
 };
 
 struct kvm_vm_stat {
@@ -257,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 	return IS_ERR_VALUE(addr);
 }
 
+#define ASYNC_PF_PER_VCPU	64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+	unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work);
+
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 #endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 058b178..ccfd0b1 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -20,6 +20,8 @@
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
 #define KVM_DEV_FLIC_ENQUEUE		2
 #define KVM_DEV_FLIC_CLEAR_IRQS		3
+#define KVM_DEV_FLIC_APF_ENABLE		4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 70b46ea..c8bacbc 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,8 @@ config KVM
 	select ANON_INODES
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_EVENTFD
+	select KVM_ASYNC_PF
+	select KVM_ASYNC_PF_SYNC
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 40b4c64..a47d2c3 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 8216c0e..bf9ed34 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -17,6 +17,7 @@
 #include "kvm-s390.h"
 #include "trace.h"
 #include "trace-s390.h"
+#include "gaccess.h"
 
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
@@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+	struct prs_parm {
+		u16 code;
+		u16 subcode;
+		u16 parm_len;
+		u16 parm_version;
+		u64 token_addr;
+		u64 select_mask;
+		u64 compare_mask;
+		u64 zarch;
+	};
+	struct prs_parm parm;
+	int rc;
+	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+	unsigned long hva_token = KVM_HVA_ERR_BAD;
+
+	if (vcpu->run->s.regs.gprs[rx] & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (parm.subcode) {
+	case 0: /* TOKEN */
+		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+			/*
+			 * If the pagefault handshake is already activated,
+			 * the token must not be changed.  We have to return
+			 * decimal 8 instead, as mandated in SC24-6084.
+			 */
+			vcpu->run->s.regs.gprs[ry] = 8;
+			return 0;
+		}
+
+		if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+		if (kvm_is_error_hva(hva_token))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		vcpu->arch.pfault_token = parm.token_addr;
+		vcpu->arch.pfault_select = parm.select_mask;
+		vcpu->arch.pfault_compare = parm.compare_mask;
+		vcpu->run->s.regs.gprs[ry] = 0;
+		rc = 0;
+		break;
+	case 1: /*
+		 * CANCEL
+		 * Specification allows to let already pending tokens survive
+		 * the cancel, therefore to reduce code complexity, we assume
+		 * all outstanding tokens are already pending.
+		 */
+		if (parm.token_addr || parm.select_mask ||
+		    parm.compare_mask || parm.zarch)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		vcpu->run->s.regs.gprs[ry] = 0;
+		/*
+		 * If the pfault handling was not established or is already
+		 * canceled SC24-6084 requests to return decimal 4.
+		 */
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			vcpu->run->s.regs.gprs[ry] = 4;
+		else
+			vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+		rc = 0;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+
+	return rc;
+}
+
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 		return __diag_time_slice_end(vcpu);
 	case 0x9c:
 		return __diag_time_slice_end_directed(vcpu);
+	case 0x258:
+		return __diag_page_ref_service(vcpu);
 	case 0x308:
 		return __diag_ipl_functions(vcpu);
 	case 0x500:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9c9192b..1848080 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -31,7 +31,7 @@ static int is_ioint(u64 type)
 	return ((type & 0xfffe0000u) != 0xfffe0000u);
 }
 
-static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
 }
@@ -78,11 +78,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 			return 1;
 		return 0;
 	case KVM_S390_INT_SERVICE:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-			return 1;
-		return 0;
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 		if (psw_extint_disabled(vcpu))
 			return 0;
@@ -150,6 +147,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -223,6 +222,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		rc |= put_guest(vcpu, inti->ext.ext_params,
 				(u32 __user *)__LC_EXT_PARAMS);
 		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+						 inti->ext.ext_params2);
+		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params2,
+				(u64 __user *) __LC_EXT_PARAMS2);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+						 inti->ext.ext_params2);
+		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params2,
+				(u64 __user *) __LC_EXT_PARAMS2);
+		break;
 	case KVM_S390_INT_VIRTIO:
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
@@ -357,7 +380,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -737,6 +760,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
 		inti->ext.ext_params = s390int->parm;
 		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		inti->type = s390int->type;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
 	case KVM_S390_MCHK:
 		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
 			 s390int->parm64);
@@ -823,6 +850,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		inti->type = s390int->type;
 		inti->mchk.mcic = s390int->parm64;
 		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		inti->type = s390int->type;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -877,6 +908,8 @@ static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
 
 	irq.type = inti->type;
 	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 		irq.u.ext = inti->ext;
@@ -956,6 +989,8 @@ static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
 		return -EFAULT;
 
 	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 		target = (void *) &inti->ext;
@@ -1019,6 +1054,8 @@ static int enqueue_floating_irq(struct kvm_device *dev,
 static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r = 0;
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
 	switch (attr->group) {
 	case KVM_DEV_FLIC_ENQUEUE:
@@ -1028,6 +1065,20 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = 0;
 		clear_floating_interrupts(dev->kvm);
 		break;
+	case KVM_DEV_FLIC_APF_ENABLE:
+		dev->kvm->arch.gmap->pfault_enabled = 1;
+		break;
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		dev->kvm->arch.gmap->pfault_enabled = 0;
+		/*
+		 * Make sure no async faults are in transition when
+		 * clearing the queues. So we don't need to worry
+		 * about late coming workers.
+		 */
+		synchronize_srcu(&dev->kvm->srcu);
+		kvm_for_each_vcpu(i, vcpu, dev->kvm)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
 	default:
 		r = -EINVAL;
 	}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9eec794..d8e9f04 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -152,6 +152,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_KVM_S390_UCONTROL
 	case KVM_CAP_S390_UCONTROL:
 #endif
+	case KVM_CAP_ASYNC_PF:
 	case KVM_CAP_SYNC_REGS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
@@ -273,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		clear_bit(63 - vcpu->vcpu_id,
 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -322,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 /* Section: vcpu related */
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
 	if (kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->arch.gmap = gmap_alloc(current->mm);
 		if (!vcpu->arch.gmap)
@@ -382,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.guest_fpregs.fpc = 0;
 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
 	vcpu->arch.sie_block->gbea = 1;
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 }
 
@@ -713,10 +719,89 @@ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+				      unsigned long token)
+{
+	struct kvm_s390_interrupt inti;
+	inti.parm64 = token;
+
+	if (start_token) {
+		inti.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+	} else {
+		inti.type = KVM_S390_INT_PFAULT_DONE;
+		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+	}
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work)
+{
+	/* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * s390 will always inject the page directly,
+	 * but we still want check_async_completion to cleanup
+	 */
+	return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+	hva_t hva;
+	struct kvm_arch_async_pf arch;
+	int rc;
+
+	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+		return 0;
+	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+	    vcpu->arch.pfault_compare)
+		return 0;
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (kvm_cpu_has_interrupt(vcpu))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		return 0;
+	if (!vcpu->arch.gmap->pfault_enabled)
+		return 0;
+
+	hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+	if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+		return 0;
+
+	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+	return rc;
+}
+
 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 {
 	int rc, cpuflags;
 
+	/*
+	 * On s390 notifications for arriving pages will be delivered directly
+	 * to the guest but the house keeping for completed pfaults is
+	 * handled outside the worker.
+	 */
+	kvm_check_async_pf_completion(vcpu);
+
 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
 
 	if (need_resched())
@@ -758,8 +843,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		rc = -EREMOTE;
 
 	} else if (current->thread.gmap_pfault) {
+		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_fault_in_sync(vcpu) >= 0)
+		if (kvm_arch_setup_async_pf(vcpu) ||
+		    (kvm_arch_fault_in_sync(vcpu) >= 0))
 			rc = 0;
 	}
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f9559b0..ed4750a 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu);
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 
+/* implemented in interrupt.c */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 87c2b3a..fe9442d 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -224,6 +224,8 @@ unlock:
 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 {
 	int rc;
+	unsigned int i;
+	struct kvm_vcpu *v;
 
 	switch (parameter & 0xff) {
 	case 0:
@@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 		break;
 	case 1:
 	case 2:
+		kvm_for_each_vcpu(i, v, vcpu->kvm) {
+			v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+			kvm_clear_async_pf_completion_queue(v);
+		}
+
 		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 		break;
 	default:
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 3db76b2..e8e7213 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -30,6 +30,52 @@
 	TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,		\
 		  __entry->pswmask, __entry->pswaddr, p_args)
 
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+	);
+
+TRACE_EVENT(kvm_s390_pfault_init,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+	);
+
+TRACE_EVENT(kvm_s390_pfault_done,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+	);
+
 /*
  * Tracepoints for SIE entry and exit.
  */
-- 
cgit v1.1


From 536336c21697551ceca44bdffb9f53e6cc5f2f20 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 30 Sep 2013 10:55:33 +0200
Subject: KVM: async_pf: Exploit one reg interface for pfault

To enable pfault after live migration we need to expose pfault_token,
pfault_select and pfault_compare, as one reg registers to userspace.

So that qemu is able to transfer this between the source and the target.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/uapi/asm/kvm.h |  3 +++
 arch/s390/kvm/kvm-s390.c         | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index ccfd0b1..cb4c1eb8 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -73,4 +73,7 @@ struct kvm_sync_regs {
 #define KVM_REG_S390_EPOCHDIFF	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
 #define KVM_REG_S390_CPU_TIMER  (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
 #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8e9f04..a5da2cc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -561,6 +561,18 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
 		r = put_user(vcpu->arch.sie_block->ckc,
 			     (u64 __user *)reg->addr);
 		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = put_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = put_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = put_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
 	default:
 		break;
 	}
@@ -590,6 +602,18 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
 		r = get_user(vcpu->arch.sie_block->ckc,
 			     (u64 __user *)reg->addr);
 		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = get_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = get_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = get_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.1