summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>1999-11-19 16:49:30 +0000
committerdillon <dillon@FreeBSD.org>1999-11-19 16:49:30 +0000
commit1ac9471dbc55b5fa42d36aa8041ffbe7f64bf917 (patch)
treea4124d514335e75961eb168aa446f957cbedfe22
parent3544d646b5ba034af1430a8d204a232f969894de (diff)
downloadFreeBSD-src-1ac9471dbc55b5fa42d36aa8041ffbe7f64bf917.zip
FreeBSD-src-1ac9471dbc55b5fa42d36aa8041ffbe7f64bf917.tar.gz
Optimize two cases in the MP locking code. First, it is not necessary
to use a locked cmpexg when unlocking a lock that we already hold, since nobody else can touch the lock while we hold it. Second, it is not necessary to use a locked cmpexg when locking a lock that we already hold, for the same reason. These changes will allow MP locks to be used recursively without impacting performance. Modify two procedures that are called only by assembly and are already NOPROF entries to pass a critical argument in %edx instead of on the stack, removing a significant amount of code from the critical path as a consequence. Reviewed by: Alfred Perlstein <bright@wintelcom.net>, Peter Wemm <peter@netplex.com.au>
-rw-r--r--sys/amd64/amd64/apic_vector.S5
-rw-r--r--sys/amd64/include/apicreg.h1
-rw-r--r--sys/i386/i386/apic_vector.s5
-rw-r--r--sys/i386/i386/mplock.s185
-rw-r--r--sys/i386/include/apic.h1
-rw-r--r--sys/i386/include/apicreg.h1
-rw-r--r--sys/i386/include/asnames.h2
-rw-r--r--sys/i386/isa/apic_vector.s5
-rw-r--r--sys/i386/isa/ipl.s5
9 files changed, 85 insertions, 125 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 8e7ac03..ca909d9 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -26,9 +26,8 @@
call _get_isrlock
#define REL_FAST_INTR_LOCK \
- pushl $_mp_lock ; /* GIANT_LOCK */ \
- call _MPrellock ; \
- add $4, %esp
+ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
+ call _MPrellock_edx
#endif /* FAST_SIMPLELOCK */
diff --git a/sys/amd64/include/apicreg.h b/sys/amd64/include/apicreg.h
index 38c7070..be9de4f 100644
--- a/sys/amd64/include/apicreg.h
+++ b/sys/amd64/include/apicreg.h
@@ -223,6 +223,7 @@ typedef struct IOAPIC ioapic_t;
/* XXX these 2 don't really belong here... */
#define COUNT_FIELD 0x00ffffff /* count portion of the lock */
+#define CPU_FIELD 0xff000000 /* cpu portion of the lock */
#define FREE_LOCK 0xffffffff /* value of lock when free */
/*
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 8e7ac03..ca909d9 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -26,9 +26,8 @@
call _get_isrlock
#define REL_FAST_INTR_LOCK \
- pushl $_mp_lock ; /* GIANT_LOCK */ \
- call _MPrellock ; \
- add $4, %esp
+ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
+ call _MPrellock_edx
#endif /* FAST_SIMPLELOCK */
diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s
index bb1a4bf..3f073fc 100644
--- a/sys/i386/i386/mplock.s
+++ b/sys/i386/i386/mplock.s
@@ -52,7 +52,7 @@
*/
/* location of saved TPR on stack */
-#define TPR_TARGET 12(%esp)
+#define TPR_TARGET 8(%esp)
/* after 1st acquire of lock we attempt to grab all hardware INTs */
#define GRAB_HWI movl $ALLHWI_LEVEL, TPR_TARGET
@@ -72,50 +72,41 @@
.text
/***********************************************************************
- * void MPgetlock(unsigned int *lock)
+ * void MPgetlock_edx(unsigned int *lock : %edx)
* ----------------------------------
- * Destroys %eax, %ecx, %edx and 12(%esp).
+ * Destroys %eax, %ecx. %edx must hold lock argument.
+ * Note: TPR_TARGET (relative to the stack) is destroyed in GRAB_HWI
*/
-NON_GPROF_ENTRY(MPgetlock)
- movl 4(%esp), %edx /* Get the address of the lock */
+NON_GPROF_ENTRY(MPgetlock_edx)
1:
- movl $FREE_LOCK, %eax /* Assume it's free */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- incl %ecx /* - new count is one */
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 2f /* ...do not collect $200 */
-#ifdef GLPROFILE
- incl _gethits2
-#endif /* GLPROFILE */
- GRAB_HWI /* 1st acquire, grab hw INTs */
+ movl (%edx), %eax /* Get current contents of lock */
+ movl %eax, %ecx
+ andl $CPU_FIELD,%ecx
+ cmpl _cpu_lockid, %ecx /* Do we already own the lock? */
+ jne 2f
+ incl %eax /* yes, just bump the count */
+ movl %eax, (%edx)
ret
2:
- movl (%edx), %eax /* Try to see if we have it already */
- andl $COUNT_FIELD, %eax /* - get count */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- orl %ecx, %eax /* - combine them */
- movl %eax, %ecx
- incl %ecx /* - new count is one more */
+ movl $FREE_LOCK, %eax /* lock must be free */
+ movl _cpu_lockid, %ecx
+ incl %ecx
lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
+ cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */
#ifdef GLPROFILE
- jne 4f /* - miss */
- incl _gethits
+ jne 3f
+ incl _gethits2
#else
- jne 3f /* - miss */
+ jne 1b
#endif /* GLPROFILE */
+ GRAB_HWI /* 1st acquire, grab hw INTs */
ret
#ifdef GLPROFILE
-4:
- incl _gethits3
-#endif /* GLPROFILE */
3:
- cmpl $FREE_LOCK, (%edx) /* Wait for it to become free */
- jne 3b
+ incl _gethits3
jmp 1b
-
+#endif
/***********************************************************************
* int MPtrylock(unsigned int *lock)
@@ -163,28 +154,22 @@ NON_GPROF_ENTRY(MPtrylock)
/***********************************************************************
- * void MPrellock(unsigned int *lock)
+ * void MPrellock_edx(unsigned int *lock : %edx)
* ----------------------------------
- * Destroys %eax, %ecx and %edx.
+ * Destroys %ecx, argument must be in %edx
*/
-NON_GPROF_ENTRY(MPrellock)
- movl 4(%esp), %edx /* Get the address of the lock */
-1:
- movl (%edx), %eax /* - get the value */
- movl %eax, %ecx
+NON_GPROF_ENTRY(MPrellock_edx)
+ movl (%edx), %ecx /* - get the value */
decl %ecx /* - new count is one less */
testl $COUNT_FIELD, %ecx /* - Unless it's zero... */
jnz 2f
ARB_HWI /* last release, arbitrate hw INTs */
movl $FREE_LOCK, %ecx /* - In which case we release it */
2:
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 1b /* ...do not collect $200 */
+ movl %ecx, (%edx)
ret
-
/***********************************************************************
* void get_mplock()
* -----------------
@@ -192,12 +177,11 @@ NON_GPROF_ENTRY(MPrellock)
*
* Stack (after call to _MPgetlock):
*
- * &mp_lock 4(%esp)
- * EFLAGS 8(%esp)
- * local APIC TPR 12(%esp)
- * edx 16(%esp)
- * ecx 20(%esp)
- * eax 24(%esp)
+ * EFLAGS 4(%esp)
+ * local APIC TPR 8(%esp) <-- note, TPR_TARGET
+ * edx 12(%esp)
+ * ecx 16(%esp)
+ * eax 20(%esp)
*/
NON_GPROF_ENTRY(get_mplock)
@@ -212,9 +196,8 @@ NON_GPROF_ENTRY(get_mplock)
jnz 1f /* INTs currently enabled */
sti /* allow IPI and FAST INTs */
1:
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl /* restore original EFLAGS */
popl lapic_tpr /* restore TPR */
@@ -235,13 +218,12 @@ NON_GPROF_ENTRY(boot_get_mplock)
pushl %edx
#ifdef GRAB_LOPRIO
- pushl $0
+ pushl $0 /* dummy TPR (TPR_TARGET) */
pushfl
#endif
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
#ifdef GRAB_LOPRIO
popfl
@@ -276,15 +258,12 @@ NON_GPROF_ENTRY(try_mplock)
*/
NON_GPROF_ENTRY(rel_mplock)
- pushl %eax
pushl %ecx
pushl %edx
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
+ movl $_mp_lock,%edx
+ call _MPrellock_edx
popl %edx
popl %ecx
- popl %eax
ret
/***********************************************************************
@@ -294,21 +273,19 @@ NON_GPROF_ENTRY(rel_mplock)
*
* Stack (after call to _MPgetlock):
*
- * &mp_lock 4(%esp)
- * EFLAGS 8(%esp)
- * local APIC TPR 12(%esp)
+ * EFLAGS 4(%esp)
+ * local APIC TPR 8(%esp)
*/
NON_GPROF_ENTRY(get_isrlock)
/* block all HW INTs via Task Priority Register */
- pushl lapic_tpr /* save current TPR */
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl /* save current EFLAGS */
sti /* allow IPI and FAST INTs */
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl /* restore original EFLAGS */
popl lapic_tpr /* restore TPR */
@@ -336,10 +313,8 @@ NON_GPROF_ENTRY(try_isrlock)
*/
NON_GPROF_ENTRY(rel_isrlock)
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
- ret
+ movl $_mp_lock,%edx
+ jmp _MPrellock_edx
/***********************************************************************
@@ -347,12 +322,11 @@ NON_GPROF_ENTRY(rel_isrlock)
*/
NON_GPROF_ENTRY(get_fpu_lock)
- pushl lapic_tpr
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl
sti
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl
popl lapic_tpr
ret
@@ -365,10 +339,8 @@ NON_GPROF_ENTRY(try_fpu_lock)
ret
NON_GPROF_ENTRY(rel_fpu_lock)
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
- ret
+ movl $_mp_lock,%edx
+ jmp _MPrellock_edx
#endif /* notneeded */
@@ -377,12 +349,11 @@ NON_GPROF_ENTRY(rel_fpu_lock)
*/
NON_GPROF_ENTRY(get_align_lock)
- pushl lapic_tpr
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl
sti
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl
popl lapic_tpr
ret
@@ -395,10 +366,8 @@ NON_GPROF_ENTRY(try_align_lock)
ret
NON_GPROF_ENTRY(rel_align_lock)
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
- ret
+ movl $_mp_lock,%edx
+ jmp _MPrellock_edx
#endif /* notneeded */
@@ -407,12 +376,11 @@ NON_GPROF_ENTRY(rel_align_lock)
*/
NON_GPROF_ENTRY(get_syscall_lock)
- pushl lapic_tpr
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl
sti
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl
popl lapic_tpr
ret
@@ -423,13 +391,11 @@ NON_GPROF_ENTRY(try_syscall_lock)
call _MPtrylock
add $4, %esp
ret
+#endif /* notneeded */
NON_GPROF_ENTRY(rel_syscall_lock)
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
- ret
-#endif /* notneeded */
+ movl $_mp_lock,%edx
+ jmp _MPrellock_edx
/***********************************************************************
@@ -437,12 +403,11 @@ NON_GPROF_ENTRY(rel_syscall_lock)
*/
NON_GPROF_ENTRY(get_altsyscall_lock)
- pushl lapic_tpr
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl
sti
- pushl $_mp_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mp_lock, %edx
+ call _MPgetlock_edx
popfl
popl lapic_tpr
ret
@@ -455,10 +420,8 @@ NON_GPROF_ENTRY(try_altsyscall_lock)
ret
NON_GPROF_ENTRY(rel_altsyscall_lock)
- pushl $_mp_lock
- call _MPrellock
- add $4, %esp
- ret
+ movl $_mp_lock,%edx
+ jmp _MPrellock_edx
#endif /* notneeded */
@@ -475,13 +438,12 @@ NON_GPROF_ENTRY(get_mpintrlock)
pushl %edx
#ifdef GRAB_LOPRIO
- pushl lapic_tpr
+ pushl lapic_tpr /* save current TPR (TPR_TARGET) */
pushfl
#endif
- pushl $_mpintr_lock
- call _MPgetlock
- add $4, %esp
+ movl $_mpintr_lock, %edx
+ call _MPgetlock_edx
#ifdef GRAB_LOPRIO
popfl
@@ -500,17 +462,14 @@ NON_GPROF_ENTRY(get_mpintrlock)
*/
NON_GPROF_ENTRY(rel_mpintrlock)
- pushl %eax
pushl %ecx
pushl %edx
- pushl $_mpintr_lock
- call _MPrellock
- add $4, %esp
+ movl $_mpintr_lock,%edx
+ call _MPrellock_edx
popl %edx
popl %ecx
- popl %eax
ret
#endif /* RECURSIVE_MPINTRLOCK */
diff --git a/sys/i386/include/apic.h b/sys/i386/include/apic.h
index 38c7070..be9de4f 100644
--- a/sys/i386/include/apic.h
+++ b/sys/i386/include/apic.h
@@ -223,6 +223,7 @@ typedef struct IOAPIC ioapic_t;
/* XXX these 2 don't really belong here... */
#define COUNT_FIELD 0x00ffffff /* count portion of the lock */
+#define CPU_FIELD 0xff000000 /* cpu portion of the lock */
#define FREE_LOCK 0xffffffff /* value of lock when free */
/*
diff --git a/sys/i386/include/apicreg.h b/sys/i386/include/apicreg.h
index 38c7070..be9de4f 100644
--- a/sys/i386/include/apicreg.h
+++ b/sys/i386/include/apicreg.h
@@ -223,6 +223,7 @@ typedef struct IOAPIC ioapic_t;
/* XXX these 2 don't really belong here... */
#define COUNT_FIELD 0x00ffffff /* count portion of the lock */
+#define CPU_FIELD 0xff000000 /* cpu portion of the lock */
#define FREE_LOCK 0xffffffff /* value of lock when free */
/*
diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h
index 8ab8dc7..17b20b5 100644
--- a/sys/i386/include/asnames.h
+++ b/sys/i386/include/asnames.h
@@ -60,7 +60,9 @@
#define _KPTphys KPTphys
#define _MP_GDT MP_GDT
#define _MPgetlock MPgetlock
+#define _MPgetlock_edx MPgetlock_edx
#define _MPrellock MPrellock
+#define _MPrellock_edx MPrellock_edx
#define _MPtrylock MPtrylock
#define _PTD PTD
#define _PTDpde PTDpde
diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s
index 8e7ac03..ca909d9 100644
--- a/sys/i386/isa/apic_vector.s
+++ b/sys/i386/isa/apic_vector.s
@@ -26,9 +26,8 @@
call _get_isrlock
#define REL_FAST_INTR_LOCK \
- pushl $_mp_lock ; /* GIANT_LOCK */ \
- call _MPrellock ; \
- add $4, %esp
+ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
+ call _MPrellock_edx
#endif /* FAST_SIMPLELOCK */
diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s
index 964834d..7c1fca1 100644
--- a/sys/i386/isa/ipl.s
+++ b/sys/i386/isa/ipl.s
@@ -174,9 +174,8 @@ doreti_exit:
#error code needed here to decide which lock to release, INTR or giant
#endif
/* release the kernel lock */
- pushl $_mp_lock /* GIANT_LOCK */
- call _MPrellock
- add $4, %esp
+ movl $_mp_lock, %edx /* GIANT_LOCK */
+ call _MPrellock_edx
#endif /* SMP */
.globl doreti_popl_fs
OpenPOWER on IntegriCloud