From 72e1ad4200d5ed5c5adf120b77fb2900a29a48e5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Sep 2017 12:34:06 +0200 Subject: KVM: s390: document memory ordering for kvm_s390_vcpu_wakeup swait_active does not enforce any ordering and it can therefore trigger some subtle races when the CPU moves the read for the check before a previous store and that store is then used on another CPU that is preparing the swait. On s390 there is a call to swait_active in kvm_s390_vcpu_wakeup. The good thing is, on s390 all potential races cannot happen because all callers of kvm_s390_vcpu_wakeup do not store (no race) or use an atomic operation, which handles memory ordering. Since this is not guaranteed by the Linux semantics (but by the implementation on s390) let's add smp_mb_after_atomic to make this obvious and document the ordering. Suggested-by: Paolo Bonzini Acked-by: Halil Pasic Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/s390/kvm/interrupt.c') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a832ad0..23d8fb2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1074,6 +1074,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * in kvm_vcpu_block without having the waitqueue set (polling) */ vcpu->valid_wakeup = true; + /* + * This is mostly to document, that the read in swait_active could + * be moved before other stores, leading to subtle races. + * All current users do not store or use an atomic like update + */ + smp_mb__after_atomic(); if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good -- cgit v1.1 From ee739f4b216e9394281cf99e6d93c67bdf4a37d2 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Mon, 3 Jul 2017 15:32:50 +0200 Subject: KVM: s390: abstract conversion between isc and enum irq_types The abstraction of the conversion between an isc value and an irq_type by means of functions isc_to_irq_type() and irq_type_to_isc() allows to clarify the respective operations where used. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Pierre Morel Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/s390/kvm/interrupt.c') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 23d8fb2..a3da4f3 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) vcpu->arch.local_int.pending_irqs; } +static inline int isc_to_irq_type(unsigned long isc) +{ + return IRQ_PEND_IO_ISC_0 + isc; +} + +static inline int irq_type_to_isc(unsigned long irq_type) +{ + return irq_type - IRQ_PEND_IO_ISC_0; +} + static unsigned long disable_iscs(struct kvm_vcpu *vcpu, unsigned long active_mask) { @@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu, for (i = 0; i <= MAX_ISC; i++) if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) - active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + active_mask &= ~(1UL << (isc_to_irq_type(i))); return active_mask; } @@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, fi = &vcpu->kvm->arch.float_int; spin_lock(&fi->lock); - isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + isc_list = &fi->lists[irq_type_to_isc(irq_type)]; inti = list_first_entry_or_null(isc_list, struct kvm_s390_interrupt_info, list); @@ -1401,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, list_del_init(&iter->list); fi->counters[FIRQ_CNTR_IO] -= 1; if (list_empty(isc_list)) - clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + clear_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return iter; } @@ -1528,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); - set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + set_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return 0; } -- cgit v1.1 From 4dd6f17eb913d3d23dd6c07950627ac2c3068dca Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 6 Jul 2017 14:22:20 +0200 Subject: KVM: s390: clear_io_irq() requests are not expected for adapter interrupts There is a chance to delete not yet delivered I/O interrupts if an exploiter uses the subsystem identification word 0x0000 while processing a KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl. -EINVAL will be returned now instead in that case. Classic interrupts will always have bit 0x10000 set in the schid while adapter interrupts have a zero schid. The clear_io_irq interface is only useful for classic interrupts (as adapter interrupts belong to many devices). Let's make this interface more strict and forbid a schid of 0. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390/kvm/interrupt.c') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a3da4f3..c8aacce 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2191,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) return -EFAULT; + if (!schid) + return -EINVAL; kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); /* * If userspace is conforming to the architecture, we can have at most -- cgit v1.1