diff options
author | jeff <jeff@FreeBSD.org> | 2009-06-23 22:42:39 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2009-06-23 22:42:39 +0000 |
commit | 5bc3a65e406b90cd9e2a47b79117e453bdb56413 (patch) | |
tree | 5644c551dea0298e335cc77383345323c6eb3662 /sys/kern | |
parent | d8bf8e1e8ad280542b4de90763d6d552c4f27b3a (diff) | |
download | FreeBSD-src-5bc3a65e406b90cd9e2a47b79117e453bdb56413.zip FreeBSD-src-5bc3a65e406b90cd9e2a47b79117e453bdb56413.tar.gz |
Implement a facility for dynamic per-cpu variables.
- Modules and kernel code alike may use DPCPU_DEFINE(),
DPCPU_GET(), DPCPU_SET(), etc. akin to the statically defined
PCPU_*. Requires only one extra instruction more than PCPU_* and is
virtually the same as __thread for builtin and much faster for shared
objects. DPCPU variables can be initialized when defined.
- Modules are supported by relocating the module's per-cpu linker set
over space reserved in the kernel. Modules may fail to load if there
is insufficient space available.
- Track space available for modules with a one-off extent allocator.
Free may block for memory to allocate space for an extent.
Reviewed by: jhb, rwatson, kan, sam, grehan, marius, marcel, stas
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/link_elf.c | 52 | ||||
-rw-r--r-- | sys/kern/link_elf_obj.c | 60 | ||||
-rw-r--r-- | sys/kern/subr_pcpu.c | 201 |
3 files changed, 303 insertions, 10 deletions
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 57f0206..cd0f3e9 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/mount.h> +#include <sys/pcpu.h> #include <sys/proc.h> #include <sys/namei.h> #include <sys/fcntl.h> @@ -107,6 +108,9 @@ typedef struct elf_file { caddr_t ctfoff; /* CTF offset table */ caddr_t typoff; /* Type offset table */ long typlen; /* Number of type entries. */ + Elf_Addr pcpu_start; /* Pre-relocation pcpu set start. */ + Elf_Addr pcpu_stop; /* Pre-relocation pcpu set stop. */ + Elf_Addr pcpu_base; /* Relocated pcpu set address. */ #ifdef GDB struct link_map gdb; /* hooks for gdb */ #endif @@ -475,6 +479,34 @@ parse_dynamic(elf_file_t ef) } static int +parse_dpcpu(elf_file_t ef) +{ + int count; + int error; + + ef->pcpu_start = 0; + ef->pcpu_stop = 0; + error = link_elf_lookup_set(&ef->lf, "pcpu", (void ***)&ef->pcpu_start, + (void ***)&ef->pcpu_stop, &count); + /* Error just means there is no pcpu set to relocate. */ + if (error) + return (0); + count *= sizeof(void *); + /* + * Allocate space in the primary pcpu area. Copy in our initialization + * from the data section and then initialize all per-cpu storage from + * that. + */ + ef->pcpu_base = (Elf_Addr)(uintptr_t)dpcpu_alloc(count); + if (ef->pcpu_base == (Elf_Addr)NULL) + return (ENOSPC); + memcpy((void *)ef->pcpu_base, (void *)ef->pcpu_start, count); + dpcpu_copy((void *)ef->pcpu_base, count); + + return (0); +} + +static int link_elf_link_preload(linker_class_t cls, const char* filename, linker_file_t *result) { @@ -519,6 +551,8 @@ link_elf_link_preload(linker_class_t cls, lf->size = *(size_t *)sizeptr; error = parse_dynamic(ef); + if (error == 0) + error = parse_dpcpu(ef); if (error) { linker_file_unload(lf, LINKER_UNLOAD_FORCE); return error; @@ -801,6 +835,9 @@ link_elf_load_file(linker_class_t cls, const char* filename, error = parse_dynamic(ef); if (error) goto out; + error = parse_dpcpu(ef); + if (error) + goto out; link_elf_reloc_local(lf); VOP_UNLOCK(nd.ni_vp, 0); @@ -897,11 +934,26 @@ out: return error; } +Elf_Addr +elf_relocaddr(linker_file_t lf, Elf_Addr x) +{ + elf_file_t ef; + + ef = (elf_file_t)lf; + if (x >= ef->pcpu_start && x < ef->pcpu_stop) + return ((x - ef->pcpu_start) + ef->pcpu_base); + return (x); +} + + static void link_elf_unload_file(linker_file_t file) { elf_file_t ef = (elf_file_t) file; + if (ef->pcpu_base) { + dpcpu_free((void *)ef->pcpu_base, ef->pcpu_stop - ef->pcpu_start); + } #ifdef GDB if (ef->gdb.l_ld) { GDB_STATE(RT_DELETE); diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 2adbe19..9d4d70c 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -333,6 +333,20 @@ link_elf_link_preload(linker_class_t cls, const char *filename, if (ef->shstrtab && shdr[i].sh_name != 0) ef->progtab[pb].name = ef->shstrtab + shdr[i].sh_name; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) { + void *dpcpu; + + dpcpu = dpcpu_alloc(shdr[i].sh_size); + if (dpcpu == NULL) { + error = ENOSPC; + goto out; + } + memcpy(dpcpu, ef->progtab[pb].addr, + ef->progtab[pb].size); + dpcpu_copy(dpcpu, shdr[i].sh_size); + ef->progtab[pb].addr = dpcpu; + } /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { @@ -712,9 +726,27 @@ link_elf_load_file(linker_class_t cls, const char *filename, alignmask = shdr[i].sh_addralign - 1; mapbase += alignmask; mapbase &= ~alignmask; - ef->progtab[pb].addr = (void *)(uintptr_t)mapbase; - if (shdr[i].sh_type == SHT_PROGBITS) { + if (ef->shstrtab && shdr[i].sh_name != 0) + ef->progtab[pb].name = + ef->shstrtab + shdr[i].sh_name; + else if (shdr[i].sh_type == SHT_PROGBITS) ef->progtab[pb].name = "<<PROGBITS>>"; + else + ef->progtab[pb].name = "<<NOBITS>>"; + if (ef->progtab[pb].name != NULL && + !strcmp(ef->progtab[pb].name, "set_pcpu")) + ef->progtab[pb].addr = + dpcpu_alloc(shdr[i].sh_size); + else + ef->progtab[pb].addr = + (void *)(uintptr_t)mapbase; + if (ef->progtab[pb].addr == NULL) { + error = ENOSPC; + goto out; + } + ef->progtab[pb].size = shdr[i].sh_size; + ef->progtab[pb].sec = i; + if (shdr[i].sh_type == SHT_PROGBITS) { error = vn_rdwr(UIO_READ, nd.ni_vp, ef->progtab[pb].addr, shdr[i].sh_size, shdr[i].sh_offset, @@ -726,15 +758,12 @@ link_elf_load_file(linker_class_t cls, const char *filename, error = EINVAL; goto out; } - } else { - ef->progtab[pb].name = "<<NOBITS>>"; + /* Initialize the per-cpu area. */ + if (ef->progtab[pb].addr != (void *)mapbase) + dpcpu_copy(ef->progtab[pb].addr, + shdr[i].sh_size); + } else bzero(ef->progtab[pb].addr, shdr[i].sh_size); - } - ef->progtab[pb].size = shdr[i].sh_size; - ef->progtab[pb].sec = i; - if (ef->shstrtab && shdr[i].sh_name != 0) - ef->progtab[pb].name = - ef->shstrtab + shdr[i].sh_name; /* Update all symbol values with the offset. */ for (j = 0; j < ef->ddbsymcnt; j++) { @@ -839,6 +868,17 @@ link_elf_unload_file(linker_file_t file) /* Notify MD code that a module is being unloaded. */ elf_cpu_unload_file(file); + if (ef->progtab) { + for (i = 0; i < ef->nprogtab; i++) { + if (ef->progtab[i].size == 0) + continue; + if (ef->progtab[i].name == NULL) + continue; + if (!strcmp(ef->progtab[i].name, "set_pcpu")) + dpcpu_free(ef->progtab[i].addr, + ef->progtab[i].size); + } + } if (ef->preloaded) { if (ef->reltab) free(ef->reltab, M_LINKER); diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index 74082c2..53437e5 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -3,6 +3,9 @@ * All rights reserved. * Written by: John Baldwin <jhb@FreeBSD.org> * + * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org> + * All rights reserved. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -49,13 +52,28 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/sysctl.h> #include <sys/linker_set.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/pcpu.h> #include <sys/proc.h> #include <sys/smp.h> +#include <sys/sx.h> #include <ddb/ddb.h> +MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting."); + +struct dpcpu_free { + uintptr_t df_start; + int df_len; + TAILQ_ENTRY(dpcpu_free) df_link; +}; + +static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]); +static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); +static struct sx dpcpu_lock; +uintptr_t dpcpu_off[MAXCPU]; struct pcpu *cpuid_to_pcpu[MAXCPU]; struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); @@ -79,7 +97,146 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) #ifdef KTR snprintf(pcpu->pc_name, sizeof(pcpu->pc_name), "CPU %d", cpuid); #endif +} + +void +dpcpu_init(void *dpcpu, int cpuid) +{ + struct pcpu *pcpu; + + pcpu = pcpu_find(cpuid); + pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START; + + /* + * Initialize defaults from our linker section. + */ + memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES); + + /* + * Place it in the global pcpu offset array. + */ + dpcpu_off[cpuid] = pcpu->pc_dynamic; +} + +static void +dpcpu_startup(void *dummy __unused) +{ + struct dpcpu_free *df; + + df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + df->df_start = (uintptr_t)&DPCPU_NAME(modspace); + df->df_len = DPCPU_MODSIZE; + TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link); + sx_init(&dpcpu_lock, "dpcpu alloc lock"); +} +SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0); + +/* + * First-fit extent based allocator for allocating space in the per-cpu + * region reserved for modules. This is only intended for use by the + * kernel linkers to place module linker sets. + */ +void * +dpcpu_alloc(int size) +{ + struct dpcpu_free *df; + void *s; + + s = NULL; + size = roundup2(size, sizeof(void *)); + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_len < size) + continue; + if (df->df_len == size) { + s = (void *)df->df_start; + TAILQ_REMOVE(&dpcpu_head, df, df_link); + free(df, M_PCPU); + break; + } + s = (void *)df->df_start; + df->df_len -= size; + df->df_start = df->df_start + size; + break; + } + sx_xunlock(&dpcpu_lock); + + return (s); +} + +/* + * Free dynamic per-cpu space at module unload time. + */ +void +dpcpu_free(void *s, int size) +{ + struct dpcpu_free *df; + struct dpcpu_free *dn; + uintptr_t start; + uintptr_t end; + + size = roundup2(size, sizeof(void *)); + start = (uintptr_t)s; + end = start + size; + /* + * Free a region of space and merge it with as many neighbors as + * possible. Keeping the list sorted simplifies this operation. + */ + sx_xlock(&dpcpu_lock); + TAILQ_FOREACH(df, &dpcpu_head, df_link) { + if (df->df_start > end) + break; + /* + * If we expand at the end of an entry we may have to + * merge it with the one following it as well. + */ + if (df->df_start + df->df_len == start) { + df->df_len += size; + dn = TAILQ_NEXT(df, df_link); + if (df->df_start + df->df_len == dn->df_start) { + df->df_len += dn->df_len; + TAILQ_REMOVE(&dpcpu_head, dn, df_link); + free(dn, M_PCPU); + } + sx_xunlock(&dpcpu_lock); + return; + } + if (df->df_start == end) { + df->df_start = start; + df->df_len += size; + sx_xunlock(&dpcpu_lock); + return; + } + } + dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO); + dn->df_start = start; + dn->df_len = size; + if (df) + TAILQ_INSERT_BEFORE(df, dn, df_link); + else + TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link); + sx_xunlock(&dpcpu_lock); +} + +/* + * Initialize the per-cpu storage from an updated linker-set region. + */ +void +dpcpu_copy(void *s, int size) +{ +#ifdef SMP + uintptr_t dpcpu; + int i; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + memcpy((void *)(dpcpu + (uintptr_t)s), s, size); + } +#else + memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size); +#endif } /* @@ -91,6 +248,7 @@ pcpu_destroy(struct pcpu *pcpu) SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; + dpcpu_off[pcpu->pc_cpuid] = 0; } /* @@ -103,6 +261,48 @@ pcpu_find(u_int cpuid) return (cpuid_to_pcpu[cpuid]); } +int +sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS) +{ + int64_t count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int64_t *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int64_t *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + +int +sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS) +{ + int count; +#ifdef SMP + uintptr_t dpcpu; + int i; + + count = 0; + for (i = 0; i < mp_ncpus; ++i) { + dpcpu = dpcpu_off[i]; + if (dpcpu == 0) + continue; + count += *(int *)(dpcpu + (uintptr_t)arg1); + } +#else + count = *(int *)(dpcpu_off[0] + (uintptr_t)arg1); +#endif + return (SYSCTL_OUT(req, &count, sizeof(count))); +} + #ifdef DDB static void @@ -111,6 +311,7 @@ show_pcpu(struct pcpu *pc) struct thread *td; db_printf("cpuid = %d\n", pc->pc_cpuid); + db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic); db_printf("curthread = "); td = pc->pc_curthread; if (td != NULL) |