From b8cee18cc75d7b9dbe6c6526dfae9ab49e84fa95 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:16 +0200 Subject: KVM: s390: use yield instead of schedule to implement diag 0x44 diag 0x44 is the common way on s390 to yield the cpu to the hypervisor. It is called by the guest in cpu_relax and in the spinlock code to yield to other guest cpus. This semantic is similar to yield. Lets replace the call to schedule with yield to make sure that current is really yielding. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index f639a15..a0775e1 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -20,7 +20,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; vcpu_put(vcpu); - schedule(); + yield(); vcpu_load(vcpu); return 0; } -- cgit v1.1 From 74b6b522ec83f9c44fc7743f2adcb24664aa8f45 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:29 +0200 Subject: KVM: s390: fix locking order problem in enable_sie There are potential locking problem in enable_sie. We take the task_lock and the mmap_sem. As exit_mm uses the same locks vice versa, this triggers a lockdep warning. The second problem is that dup_mm and mmput might sleep, so we must not hold the task_lock at that moment. The solution is to dup the mm unconditional and use the task_lock before and afterwards to check if we can use the new mm. dup_mm and mmput are called outside the task_lock, but we run update_mm while holding the task_lock, protection us against ptrace. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Acked-by: Martin Schwidefsky Signed-off-by: Avi Kivity --- arch/s390/mm/pgtable.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5c1aea9..3d98ba8 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -254,36 +254,46 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm; - int rc; + struct mm_struct *mm, *old_mm; - task_lock(tsk); - - rc = 0; + /* Do we have pgstes? if yes, we are done */ if (tsk->mm->context.pgstes) - goto unlock; + return 0; - rc = -EINVAL; + /* lets check if we are allowed to replace the mm */ + task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) - goto unlock; + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + task_unlock(tsk); + return -EINVAL; + } + task_unlock(tsk); - tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ + /* we copy the mm with pgstes enabled */ + tsk->mm->context.pgstes = 1; mm = dup_mm(tsk); tsk->mm->context.pgstes = 0; - - rc = -ENOMEM; if (!mm) - goto unlock; - mmput(tsk->mm); + return -ENOMEM; + + /* Now lets check again if somebody attached ptrace etc */ + task_lock(tsk); + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + mmput(mm); + task_unlock(tsk); + return -EINVAL; + } + + /* ok, we are alone. No ptrace, no threads, etc. */ + old_mm = tsk->mm; tsk->mm = tsk->active_mm = mm; preempt_disable(); update_mm(mm, tsk); cpu_set(smp_processor_id(), mm->cpu_vm_mask); preempt_enable(); - rc = 0; -unlock: task_unlock(tsk); - return rc; + mmput(old_mm); + return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); -- cgit v1.1 From 71cde5879f10b639506bc0b9f29a89f58b42a17e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:34 +0200 Subject: KVM: s390: handle machine checks when guest is running The low-level interrupt handler on s390 checks for _TIF_WORK_INT and exits the guest context, if work is pending. TIF_WORK_INT is defined as_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING. Currently the sie loop checks for signals and reschedule, but it does not check for machine checks. That means that we exit the guest context if a machine check is pending, but we do not handle the machine check. Signed-off-by: Christian Borntraeger CC: Heiko Carstens Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0ac36a6..40e4f2de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -423,6 +423,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +extern void s390_handle_mcck(void); + static void __vcpu_run(struct kvm_vcpu *vcpu) { memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); @@ -430,6 +432,9 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) if (need_resched()) schedule(); + if (test_thread_flag(TIF_MCCK_PENDING)) + s390_handle_mcck(); + vcpu->arch.sie_block->icptcode = 0; local_irq_disable(); kvm_guest_enter(); -- cgit v1.1 From 0ff318674503ce3787ef62d84f4d948db204b268 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:37 +0200 Subject: KVM: s390: fix interrupt delivery The current code delivers pending interrupts before it checks for need_resched. On a busy host, this can lead to a longer interrupt latency if the interrupt is injected while the process is scheduled away. This patch moves delivering the interrupt _after_ schedule(), which makes more sense. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 40e4f2de..ded27c7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -435,6 +435,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) if (test_thread_flag(TIF_MCCK_PENDING)) s390_handle_mcck(); + kvm_s390_deliver_pending_interrupts(vcpu); + vcpu->arch.sie_block->icptcode = 0; local_irq_disable(); kvm_guest_enter(); @@ -480,7 +482,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) might_sleep(); do { - kvm_s390_deliver_pending_interrupts(vcpu); __vcpu_run(vcpu); rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); -- cgit v1.1 From 1f0d0f094df9a570dfc26d5eb825986b7e165e1d Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:40 +0200 Subject: KVM: s390: Send program check on access error If the guest accesses non-existing memory, the sie64a function returns -EFAULT. We must check the return value and send a program check to the guest if the sie instruction faulted, otherwise the guest will loop at the faulting code. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ded27c7..6558b09 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -443,7 +443,10 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); - sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); local_irq_disable(); -- cgit v1.1 From e52b2af541bcb299212a63cfa3e3231618a415be Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:44 +0200 Subject: KVM: s390: Fix race condition in kvm_s390_handle_wait The call to add_timer was issued before local_int.lock was taken and before timer_due was set to 0. If the timer expires before the lock is being taken, the timer function will set timer_due to 1 and exit before the vcpu falls asleep. Depending on other external events, the vcpu might sleep forever. This fix pulls setting timer_due to the beginning of the function before add_timer, which ensures correct behavior. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/interrupt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcd1ed8..84a7fed 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -339,6 +339,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) if (kvm_cpu_has_interrupt(vcpu)) return 0; + __set_cpu_idle(vcpu); + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + if (psw_interrupts_disabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); __unset_cpu_idle(vcpu); @@ -366,8 +371,6 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - __set_cpu_idle(vcpu); - vcpu->arch.local_int.timer_due = 0; add_wait_queue(&vcpu->arch.local_int.wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && -- cgit v1.1