x86/paravirt: add hooks for spinlock operations

Ticket spinlocks have absolutely ghastly worst-case performance characteristics in a virtual environment. If there is any contention for physical CPUs (ie, there are more runnable vcpus than cpus), then ticket locks can cause the system to end up spending 90+% of its time spinning. The problem is that (v)cpus waiting on a ticket spinlock will be granted access to the lock in strict order they got their tickets. If the hypervisor scheduler doesn't give the vcpus time in that order, they will burn timeslices waiting for the scheduler to give the right vcpu some time. In the worst case it could take O(n^2) vcpu scheduler timeslices for everyone waiting on the lock to get it, not counting new cpus trying to take the lock while the log-jam is sorted out. These hooks allow a paravirt backend to replace the spinlock implementation. At the very least, this could revert the implementation back to the old lock algorithm, which allows the next scheduled vcpu to take the lock, and has basically fairly good performance. It also allows the spinlocks to take advantages of the hypervisor features to make locks more efficient (spin and block, for example). The cost to native execution is an extra direct call when using a spinlock function. There's no overhead if CONFIG_PARAVIRT is turned off. The lock structure is fixed at a single "unsigned int", initialized to zero, but the spinlock implementation can use it as it wishes. Thanks to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around" for pointing out this problem. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Christoph Lameter <clameter@linux-foundation.org> Cc: Petr Tesarik <ptesarik@suse.cz> Cc: Virtualization <virtualization@lists.linux-foundation.org> Cc: Xen devel <xen-devel@lists.xensource.com> Cc: Thomas Friebel <thomas.friebel@amd.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Jeremy Fitzhardinge <jeremy@goop.org> 2008-07-07 12:07:50 -0700
committer: Ingo Molnar <mingo@elte.hu> 2008-07-16 11:15:52 +0200
commit: 74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d (patch)
tree: ea70d2323c8a424e8c20389514c6c91f149cdf72 /include/asm-x86/paravirt.h
parent: 094029479be8eb380447f42eff1b35362ef1a464 (diff)
download: op-kernel-dev-74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d.zip
op-kernel-dev-74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d.tar.gz
1 files changed, 37 insertions, 0 deletions
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index eef8095..feb6bb6 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -326,6 +326,15 @@ struct pv_mmu_ops {
 			   unsigned long phys, pgprot_t flags);
 };
 
+struct raw_spinlock;
+struct pv_lock_ops {
+	int (*spin_is_locked)(struct raw_spinlock *lock);
+	int (*spin_is_contended)(struct raw_spinlock *lock);
+	void (*spin_lock)(struct raw_spinlock *lock);
+	int (*spin_trylock)(struct raw_spinlock *lock);
+	void (*spin_unlock)(struct raw_spinlock *lock);
+};
+
 /* This contains all the paravirt structures: we get a convenient
  * number for each function using the offset which we use to indicate
  * what to patch. */
@@ -336,6 +345,7 @@ struct paravirt_patch_template {
 	struct pv_irq_ops pv_irq_ops;
 	struct pv_apic_ops pv_apic_ops;
 	struct pv_mmu_ops pv_mmu_ops;
+	struct pv_lock_ops pv_lock_ops;
 };
 
 extern struct pv_info pv_info;
@@ -345,6 +355,7 @@ extern struct pv_cpu_ops pv_cpu_ops;
 extern struct pv_irq_ops pv_irq_ops;
 extern struct pv_apic_ops pv_apic_ops;
 extern struct pv_mmu_ops pv_mmu_ops;
+extern struct pv_lock_ops pv_lock_ops;
 
 #define PARAVIRT_PATCH(x)					\
 	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -1374,6 +1385,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 void _paravirt_nop(void);
 #define paravirt_nop	((void *)_paravirt_nop)
 
+static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+}
+
+static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
+}
+
+static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+{
+	return PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
+}
+
+static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+{
+	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+}
+
+static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+{
+	return PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+}
+
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
 	u8 *instr; 		/* original instructions */
@@ -1458,6 +1494,7 @@ static inline unsigned long __raw_local_irq_save(void)
 	return f;
 }
 
+
 /* Make sure as little as possible of this mess escapes. */
 #undef PARAVIRT_CALL
 #undef __PVOP_CALL
author	Jeremy Fitzhardinge <jeremy@goop.org>	2008-07-07 12:07:50 -0700
committer	Ingo Molnar <mingo@elte.hu>	2008-07-16 11:15:52 +0200
commit	74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d (patch)
tree	ea70d2323c8a424e8c20389514c6c91f149cdf72 /include/asm-x86/paravirt.h
parent	094029479be8eb380447f42eff1b35362ef1a464 (diff)
download	op-kernel-dev-74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d.zip op-kernel-dev-74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d.tar.gz