summaryrefslogtreecommitdiffstats
path: root/libexec/rtld-elf/amd64/lockdflt.c
diff options
context:
space:
mode:
Diffstat (limited to 'libexec/rtld-elf/amd64/lockdflt.c')
-rw-r--r--libexec/rtld-elf/amd64/lockdflt.c242
1 files changed, 203 insertions, 39 deletions
diff --git a/libexec/rtld-elf/amd64/lockdflt.c b/libexec/rtld-elf/amd64/lockdflt.c
index 4233b36..b2ca9a5 100644
--- a/libexec/rtld-elf/amd64/lockdflt.c
+++ b/libexec/rtld-elf/amd64/lockdflt.c
@@ -26,64 +26,228 @@
*/
/*
- * Default thread locking implementation for the dynamic linker. It
- * is used until the client registers a different implementation with
- * dllockinit(). The default implementation does mutual exclusion by
- * blocking almost all signals. This is based on the observation that
- * most userland thread packages use signals to support preemption.
+ * Thread locking implementation for the dynamic linker.
+ *
+ * On 80486 and later CPUs we use the "simple, non-scalable
+ * reader-preference lock" from:
+ *
+ * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
+ * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
+ * Principles and Practice of Parallel Programming, April 1991.
+ *
+ * In this algorithm the lock is a single word. Its low-order bit is
+ * set when a writer holds the lock. The remaining high-order bits
+ * contain a count of readers desiring the lock. The algorithm requires
+ * atomic "compare_and_store" and "add" operations.
+ *
+ * The "compare_and_store" operation requires the "cmpxchg" instruction
+ * on the x86. Unfortunately, the 80386 CPU does not support that
+ * instruction -- only the 80486 and later models support it. So on the
+ * 80386 we must use simple test-and-set exclusive locks instead. We
+ * determine which kind of lock to use by trying to execute a "cmpxchg"
+ * instruction and catching the SIGILL which results on the 80386.
+ *
+ * These are spinlocks. When spinning we call nanosleep() for 1
+ * microsecond each time around the loop. This will most likely yield
+ * the CPU to other threads (including, we hope, the lockholder) allowing
+ * them to make some progress.
*/
-#include <dlfcn.h>
+#include <setjmp.h>
#include <signal.h>
#include <stdlib.h>
+#include <time.h>
#include "debug.h"
#include "rtld.h"
-typedef struct Struct_LockDflt {
- sigset_t lock_mask;
- sigset_t old_mask;
- int depth;
-} LockDflt;
+#define CACHE_LINE_SIZE 32
-void
-lockdflt_acquire(void *lock)
+#define WAFLAG 0x1 /* A writer holds the lock */
+#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
+
+typedef struct Struct_Lock {
+ volatile int lock;
+ void *base;
+} Lock;
+
+static const struct timespec usec = { 0, 1000 }; /* 1 usec. */
+
+static inline int
+cmpxchgl(int old, int new, volatile int *m)
{
- LockDflt *l = (LockDflt *)lock;
- sigprocmask(SIG_BLOCK, &l->lock_mask, &l->old_mask);
- assert(l->depth == 0);
- l->depth++;
+ int result;
+
+ __asm __volatile ("lock; cmpxchgl %2, %0"
+ : "=m"(*m), "=a"(result)
+ : "r"(new), "0"(*m), "1"(old)
+ : "cc");
+
+ return result;
}
-void *
-lockdflt_create(void *context)
+static inline int
+xchgl(int v, volatile int *m)
{
- LockDflt *l;
-
- l = NEW(LockDflt);
- l->depth = 0;
- sigfillset(&l->lock_mask);
- sigdelset(&l->lock_mask, SIGTRAP);
- sigdelset(&l->lock_mask, SIGABRT);
- sigdelset(&l->lock_mask, SIGBUS);
- sigdelset(&l->lock_mask, SIGSEGV);
- sigdelset(&l->lock_mask, SIGKILL);
- sigdelset(&l->lock_mask, SIGSTOP);
+ int result;
+
+ __asm __volatile ("xchgl %0, %1"
+ : "=r"(result), "=m"(*m)
+ : "0"(v), "1"(*m));
+
+ return result;
+}
+
+static void *
+lock_create(void *context)
+{
+ void *base;
+ char *p;
+ uintptr_t r;
+ Lock *l;
+
+ /*
+ * Arrange for the lock to occupy its own cache line. First, we
+ * optimistically allocate just a cache line, hoping that malloc
+ * will give us a well-aligned block of memory. If that doesn't
+ * work, we allocate a larger block and take a well-aligned cache
+ * line from it.
+ */
+ base = xmalloc(CACHE_LINE_SIZE);
+ p = (char *)base;
+ if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
+ free(base);
+ base = xmalloc(2 * CACHE_LINE_SIZE);
+ p = (char *)base;
+ if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
+ p += CACHE_LINE_SIZE - r;
+ }
+ l = (Lock *)p;
+ l->base = base;
+ l->lock = 0;
return l;
}
-void
-lockdflt_destroy(void *lock)
+static void
+lock_destroy(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ free(l->base);
+}
+
+/*
+ * Crude exclusive locks for the 80386, which does not support the
+ * cmpxchg instruction.
+ */
+static void
+lock80386_acquire(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ while (xchgl(1, &l->lock) != 0)
+ while (l->lock != 0)
+ nanosleep(&usec, NULL);
+}
+
+static void
+lock80386_release(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ l->lock = 0;
+}
+
+/*
+ * Better reader/writer locks for the 80486 and later CPUs.
+ */
+static void
+rlock_acquire(void *lock)
{
- LockDflt *l = (LockDflt *)lock;
- free(l);
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, RC_INCR);
+ while (l->lock & WAFLAG)
+ nanosleep(&usec, NULL);
+}
+
+static void
+wlock_acquire(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ while (cmpxchgl(0, WAFLAG, &l->lock) != 0)
+ nanosleep(&usec, NULL);
+}
+
+static void
+rlock_release(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, -RC_INCR);
+}
+
+static void
+wlock_release(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, -WAFLAG);
+}
+
+/*
+ * Code to determine at runtime whether the CPU supports the cmpxchg
+ * instruction. This instruction allows us to use locks that are more
+ * efficient, but it didn't exist on the 80386.
+ */
+static jmp_buf sigill_env;
+
+static void
+sigill(int sig)
+{
+ longjmp(sigill_env, 1);
+}
+
+static int
+cpu_supports_cmpxchg(void)
+{
+ struct sigaction act, oact;
+ int result;
+ volatile int lock;
+
+ memset(&act, 0, sizeof act);
+ act.sa_handler = sigill;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+
+ sigaction(SIGILL, &act, &oact);
+ if (setjmp(sigill_env) == 0) {
+ lock = 0;
+ cmpxchgl(0, 1, &lock);
+ result = 1;
+ } else
+ result = 0;
+ sigaction(SIGILL, &oact, NULL);
+ return result;
}
void
-lockdflt_release(void *lock)
+lockdflt_init(LockInfo *li)
{
- LockDflt *l = (LockDflt *)lock;
- assert(l->depth == 1);
- l->depth--;
- sigprocmask(SIG_SETMASK, &l->old_mask, NULL);
+ li->context = NULL;
+ li->context_destroy = NULL;
+ li->lock_create = lock_create;
+ li->lock_destroy = lock_destroy;
+ if (cpu_supports_cmpxchg()) {
+ /* Use fast locks that require an 80486 or later. */
+ li->rlock_acquire = rlock_acquire;
+ li->wlock_acquire = wlock_acquire;
+ li->rlock_release = rlock_release;
+ li->wlock_release = wlock_release;
+ } else {
+ /* It's a cruddy old 80386. */
+ li->rlock_acquire = li->wlock_acquire = lock80386_acquire;
+ li->rlock_release = li->wlock_release = lock80386_release;
+ }
}
OpenPOWER on IntegriCloud