summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2009-06-23 22:42:39 +0000
committerjeff <jeff@FreeBSD.org>2009-06-23 22:42:39 +0000
commit5bc3a65e406b90cd9e2a47b79117e453bdb56413 (patch)
tree5644c551dea0298e335cc77383345323c6eb3662 /sys/kern
parentd8bf8e1e8ad280542b4de90763d6d552c4f27b3a (diff)
downloadFreeBSD-src-5bc3a65e406b90cd9e2a47b79117e453bdb56413.zip
FreeBSD-src-5bc3a65e406b90cd9e2a47b79117e453bdb56413.tar.gz
Implement a facility for dynamic per-cpu variables.
- Modules and kernel code alike may use DPCPU_DEFINE(), DPCPU_GET(), DPCPU_SET(), etc. akin to the statically defined PCPU_*. Requires only one extra instruction more than PCPU_* and is virtually the same as __thread for builtin and much faster for shared objects. DPCPU variables can be initialized when defined. - Modules are supported by relocating the module's per-cpu linker set over space reserved in the kernel. Modules may fail to load if there is insufficient space available. - Track space available for modules with a one-off extent allocator. Free may block for memory to allocate space for an extent. Reviewed by: jhb, rwatson, kan, sam, grehan, marius, marcel, stas
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/link_elf.c52
-rw-r--r--sys/kern/link_elf_obj.c60
-rw-r--r--sys/kern/subr_pcpu.c201
3 files changed, 303 insertions, 10 deletions
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 57f0206..cd0f3e9 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/mount.h>
+#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/fcntl.h>
@@ -107,6 +108,9 @@ typedef struct elf_file {
caddr_t ctfoff; /* CTF offset table */
caddr_t typoff; /* Type offset table */
long typlen; /* Number of type entries. */
+ Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */
+ Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */
+ Elf_Addr pcpu_base; /* Relocated pcpu set address. */
#ifdef GDB
struct link_map gdb; /* hooks for gdb */
#endif
@@ -475,6 +479,34 @@ parse_dynamic(elf_file_t ef)
}
static int
+parse_dpcpu(elf_file_t ef)
+{
+ int count;
+ int error;
+
+ ef->pcpu_start = 0;
+ ef->pcpu_stop = 0;
+ error = link_elf_lookup_set(&ef->lf, "pcpu", (void ***)&ef->pcpu_start,
+ (void ***)&ef->pcpu_stop, &count);
+ /* Error just means there is no pcpu set to relocate. */
+ if (error)
+ return (0);
+ count *= sizeof(void *);
+ /*
+ * Allocate space in the primary pcpu area. Copy in our initialization
+ * from the data section and then initialize all per-cpu storage from
+ * that.
+ */
+ ef->pcpu_base = (Elf_Addr)(uintptr_t)dpcpu_alloc(count);
+ if (ef->pcpu_base == (Elf_Addr)NULL)
+ return (ENOSPC);
+ memcpy((void *)ef->pcpu_base, (void *)ef->pcpu_start, count);
+ dpcpu_copy((void *)ef->pcpu_base, count);
+
+ return (0);
+}
+
+static int
link_elf_link_preload(linker_class_t cls,
const char* filename, linker_file_t *result)
{
@@ -519,6 +551,8 @@ link_elf_link_preload(linker_class_t cls,
lf->size = *(size_t *)sizeptr;
error = parse_dynamic(ef);
+ if (error == 0)
+ error = parse_dpcpu(ef);
if (error) {
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
return error;
@@ -801,6 +835,9 @@ link_elf_load_file(linker_class_t cls, const char* filename,
error = parse_dynamic(ef);
if (error)
goto out;
+ error = parse_dpcpu(ef);
+ if (error)
+ goto out;
link_elf_reloc_local(lf);
VOP_UNLOCK(nd.ni_vp, 0);
@@ -897,11 +934,26 @@ out:
return error;
}
+Elf_Addr
+elf_relocaddr(linker_file_t lf, Elf_Addr x)
+{
+ elf_file_t ef;
+
+ ef = (elf_file_t)lf;
+ if (x >= ef->pcpu_start && x < ef->pcpu_stop)
+ return ((x - ef->pcpu_start) + ef->pcpu_base);
+ return (x);
+}
+
+
static void
link_elf_unload_file(linker_file_t file)
{
elf_file_t ef = (elf_file_t) file;
+ if (ef->pcpu_base) {
+ dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start);
+ }
#ifdef GDB
if (ef->gdb.l_ld) {
GDB_STATE(RT_DELETE);
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 2adbe19..9d4d70c 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -333,6 +333,20 @@ link_elf_link_preload(linker_class_t cls, const char *filename,
if (ef->shstrtab && shdr[i].sh_name != 0)
ef->progtab[pb].name =
ef->shstrtab + shdr[i].sh_name;
+ if (ef->progtab[pb].name != NULL &&
+ !strcmp(ef->progtab[pb].name, "set_pcpu")) {
+ void *dpcpu;
+
+ dpcpu = dpcpu_alloc(shdr[i].sh_size);
+ if (dpcpu == NULL) {
+ error = ENOSPC;
+ goto out;
+ }
+ memcpy(dpcpu, ef->progtab[pb].addr,
+ ef->progtab[pb].size);
+ dpcpu_copy(dpcpu, shdr[i].sh_size);
+ ef->progtab[pb].addr = dpcpu;
+ }
/* Update all symbol values with the offset. */
for (j = 0; j < ef->ddbsymcnt; j++) {
@@ -712,9 +726,27 @@ link_elf_load_file(linker_class_t cls, const char *filename,
alignmask = shdr[i].sh_addralign - 1;
mapbase += alignmask;
mapbase &= ~alignmask;
- ef->progtab[pb].addr = (void *)(uintptr_t)mapbase;
- if (shdr[i].sh_type == SHT_PROGBITS) {
+ if (ef->shstrtab && shdr[i].sh_name != 0)
+ ef->progtab[pb].name =
+ ef->shstrtab + shdr[i].sh_name;
+ else if (shdr[i].sh_type == SHT_PROGBITS)
ef->progtab[pb].name = "<<PROGBITS>>";
+ else
+ ef->progtab[pb].name = "<<NOBITS>>";
+ if (ef->progtab[pb].name != NULL &&
+ !strcmp(ef->progtab[pb].name, "set_pcpu"))
+ ef->progtab[pb].addr =
+ dpcpu_alloc(shdr[i].sh_size);
+ else
+ ef->progtab[pb].addr =
+ (void *)(uintptr_t)mapbase;
+ if (ef->progtab[pb].addr == NULL) {
+ error = ENOSPC;
+ goto out;
+ }
+ ef->progtab[pb].size = shdr[i].sh_size;
+ ef->progtab[pb].sec = i;
+ if (shdr[i].sh_type == SHT_PROGBITS) {
error = vn_rdwr(UIO_READ, nd.ni_vp,
ef->progtab[pb].addr,
shdr[i].sh_size, shdr[i].sh_offset,
@@ -726,15 +758,12 @@ link_elf_load_file(linker_class_t cls, const char *filename,
error = EINVAL;
goto out;
}
- } else {
- ef->progtab[pb].name = "<<NOBITS>>";
+ /* Initialize the per-cpu area. */
+ if (ef->progtab[pb].addr != (void *)mapbase)
+ dpcpu_copy(ef->progtab[pb].addr,
+ shdr[i].sh_size);
+ } else
bzero(ef->progtab[pb].addr, shdr[i].sh_size);
- }
- ef->progtab[pb].size = shdr[i].sh_size;
- ef->progtab[pb].sec = i;
- if (ef->shstrtab && shdr[i].sh_name != 0)
- ef->progtab[pb].name =
- ef->shstrtab + shdr[i].sh_name;
/* Update all symbol values with the offset. */
for (j = 0; j < ef->ddbsymcnt; j++) {
@@ -839,6 +868,17 @@ link_elf_unload_file(linker_file_t file)
/* Notify MD code that a module is being unloaded. */
elf_cpu_unload_file(file);
+ if (ef->progtab) {
+ for (i = 0; i < ef->nprogtab; i++) {
+ if (ef->progtab[i].size == 0)
+ continue;
+ if (ef->progtab[i].name == NULL)
+ continue;
+ if (!strcmp(ef->progtab[i].name, "set_pcpu"))
+ dpcpu_free(ef->progtab[i].addr,
+ ef->progtab[i].size);
+ }
+ }
if (ef->preloaded) {
if (ef->reltab)
free(ef->reltab, M_LINKER);
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 74082c2..53437e5 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -3,6 +3,9 @@
* All rights reserved.
* Written by: John Baldwin <jhb@FreeBSD.org>
*
+ * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -49,13 +52,28 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <sys/linker_set.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/smp.h>
+#include <sys/sx.h>
#include <ddb/ddb.h>
+MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
+
+struct dpcpu_free {
+ uintptr_t df_start;
+ int df_len;
+ TAILQ_ENTRY(dpcpu_free) df_link;
+};
+
+static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
+static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
+static struct sx dpcpu_lock;
+uintptr_t dpcpu_off[MAXCPU];
struct pcpu *cpuid_to_pcpu[MAXCPU];
struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead);
@@ -79,7 +97,146 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
#ifdef KTR
snprintf(pcpu->pc_name, sizeof(pcpu->pc_name), "CPU %d", cpuid);
#endif
+}
+
+void
+dpcpu_init(void *dpcpu, int cpuid)
+{
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_find(cpuid);
+ pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
+
+ /*
+ * Initialize defaults from our linker section.
+ */
+ memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
+
+ /*
+ * Place it in the global pcpu offset array.
+ */
+ dpcpu_off[cpuid] = pcpu->pc_dynamic;
+}
+
+static void
+dpcpu_startup(void *dummy __unused)
+{
+ struct dpcpu_free *df;
+
+ df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
+ df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
+ df->df_len = DPCPU_MODSIZE;
+ TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
+ sx_init(&dpcpu_lock, "dpcpu alloc lock");
+}
+SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0);
+
+/*
+ * First-fit extent based allocator for allocating space in the per-cpu
+ * region reserved for modules. This is only intended for use by the
+ * kernel linkers to place module linker sets.
+ */
+void *
+dpcpu_alloc(int size)
+{
+ struct dpcpu_free *df;
+ void *s;
+
+ s = NULL;
+ size = roundup2(size, sizeof(void *));
+ sx_xlock(&dpcpu_lock);
+ TAILQ_FOREACH(df, &dpcpu_head, df_link) {
+ if (df->df_len < size)
+ continue;
+ if (df->df_len == size) {
+ s = (void *)df->df_start;
+ TAILQ_REMOVE(&dpcpu_head, df, df_link);
+ free(df, M_PCPU);
+ break;
+ }
+ s = (void *)df->df_start;
+ df->df_len -= size;
+ df->df_start = df->df_start + size;
+ break;
+ }
+ sx_xunlock(&dpcpu_lock);
+
+ return (s);
+}
+
+/*
+ * Free dynamic per-cpu space at module unload time.
+ */
+void
+dpcpu_free(void *s, int size)
+{
+ struct dpcpu_free *df;
+ struct dpcpu_free *dn;
+ uintptr_t start;
+ uintptr_t end;
+
+ size = roundup2(size, sizeof(void *));
+ start = (uintptr_t)s;
+ end = start + size;
+ /*
+ * Free a region of space and merge it with as many neighbors as
+ * possible. Keeping the list sorted simplifies this operation.
+ */
+ sx_xlock(&dpcpu_lock);
+ TAILQ_FOREACH(df, &dpcpu_head, df_link) {
+ if (df->df_start > end)
+ break;
+ /*
+ * If we expand at the end of an entry we may have to
+ * merge it with the one following it as well.
+ */
+ if (df->df_start + df->df_len == start) {
+ df->df_len += size;
+ dn = TAILQ_NEXT(df, df_link);
+ if (df->df_start + df->df_len == dn->df_start) {
+ df->df_len += dn->df_len;
+ TAILQ_REMOVE(&dpcpu_head, dn, df_link);
+ free(dn, M_PCPU);
+ }
+ sx_xunlock(&dpcpu_lock);
+ return;
+ }
+ if (df->df_start == end) {
+ df->df_start = start;
+ df->df_len += size;
+ sx_xunlock(&dpcpu_lock);
+ return;
+ }
+ }
+ dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
+ dn->df_start = start;
+ dn->df_len = size;
+ if (df)
+ TAILQ_INSERT_BEFORE(df, dn, df_link);
+ else
+ TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
+ sx_xunlock(&dpcpu_lock);
+}
+
+/*
+ * Initialize the per-cpu storage from an updated linker-set region.
+ */
+void
+dpcpu_copy(void *s, int size)
+{
+#ifdef SMP
+ uintptr_t dpcpu;
+ int i;
+ for (i = 0; i < mp_ncpus; ++i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
+ }
+#else
+ memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
+#endif
}
/*
@@ -91,6 +248,7 @@ pcpu_destroy(struct pcpu *pcpu)
SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
+ dpcpu_off[pcpu->pc_cpuid] = 0;
}
/*
@@ -103,6 +261,48 @@ pcpu_find(u_int cpuid)
return (cpuid_to_pcpu[cpuid]);
}
+int
+sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
+{
+ int64_t count;
+#ifdef SMP
+ uintptr_t dpcpu;
+ int i;
+
+ count = 0;
+ for (i = 0; i < mp_ncpus; ++i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
+ }
+#else
+ count = *(int64_t *)(dpcpu_off[0] + (uintptr_t)arg1);
+#endif
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
+}
+
+int
+sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
+{
+ int count;
+#ifdef SMP
+ uintptr_t dpcpu;
+ int i;
+
+ count = 0;
+ for (i = 0; i < mp_ncpus; ++i) {
+ dpcpu = dpcpu_off[i];
+ if (dpcpu == 0)
+ continue;
+ count += *(int *)(dpcpu + (uintptr_t)arg1);
+ }
+#else
+ count = *(int *)(dpcpu_off[0] + (uintptr_t)arg1);
+#endif
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
+}
+
#ifdef DDB
static void
@@ -111,6 +311,7 @@ show_pcpu(struct pcpu *pc)
struct thread *td;
db_printf("cpuid = %d\n", pc->pc_cpuid);
+ db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
db_printf("curthread = ");
td = pc->pc_curthread;
if (td != NULL)
OpenPOWER on IntegriCloud