summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2013-04-08 19:10:45 +0000
committerglebius <glebius@FreeBSD.org>2013-04-08 19:10:45 +0000
commit7f9db020a246190e72c9b9656997403221c48a3a (patch)
treeaadd654864cd16caea405f5ff2772f2fcd665103
parent4312ec3f0dd92a3dedf58a28ebc6f4ae3a1a7672 (diff)
downloadFreeBSD-src-7f9db020a246190e72c9b9656997403221c48a3a.zip
FreeBSD-src-7f9db020a246190e72c9b9656997403221c48a3a.tar.gz
Merge from projects/counters: UMA_ZONE_PCPU zones.
These zones have slab size == sizeof(struct pcpu), but request from VM enough pages to fit (uk_slabsize * mp_ncpus). An item allocated from such zone would have a separate twin for each CPU in the system, and these twins are at a distance of sizeof(struct pcpu) from each other. This magic value of distance would allow us to make some optimizations later. To address private item from a CPU simple arithmetics should be used: item = (type *)((char *)base + sizeof(struct pcpu) * curcpu) These arithmetics are available as zpcpu_get() macro in pcpu.h. To introduce non-page size slabs a new field had been added to uma_keg uk_slabsize. This shifted some frequently used fields of uma_keg to the fourth cache line on amd64. To mitigate this pessimization, uma_keg fields were a bit rearranged and least frequently used uk_name and uk_link moved down to the fourth cache line. All other fields, that are dereferenced frequently fit into first three cache lines. Sponsored by: Nginx, Inc.
-rw-r--r--share/man/man9/zone.917
-rw-r--r--sys/vm/uma.h6
-rw-r--r--sys/vm/uma_core.c88
-rw-r--r--sys/vm/uma_int.h12
4 files changed, 85 insertions, 38 deletions
diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9
index 03bb33b..d7eebe4 100644
--- a/share/man/man9/zone.9
+++ b/share/man/man9/zone.9
@@ -153,6 +153,23 @@ See
.Fn uma_find_refcnt .
.It Dv UMA_ZONE_NODUMP
Pages belonging to the zone will not be included into mini-dumps.
+.It Dv UMA_ZONE_PCPU
+An allocation from zone would have
+.Va mp_ncpu
+shadow copies, that are privately assigned to CPUs.
+A CPU can address its private copy using base allocation address plus
+multiple of current CPU id and
+.Fn sizeof "struct pcpu" :
+.Bd -literal -offset indent
+foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
+ ...
+foo_base = uma_zalloc(foo_zone, ...);
+ ...
+critical_enter();
+foo_pcpu = (foo_t *)zpcpu_get(foo_base);
+/* do something with foo_pcpu */
+critical_exit();
+.Ed
.It Dv UMA_ZONE_OFFPAGE
By default book-keeping of items within a slab is done in the slab page itself.
This flag explicitly tells subsystem that book-keeping structure should be
diff --git a/sys/vm/uma.h b/sys/vm/uma.h
index 2569b88..b18693b 100644
--- a/sys/vm/uma.h
+++ b/sys/vm/uma.h
@@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* Zone's pages will not be included in
* mini-dumps.
*/
+#define UMA_ZONE_PCPU 0x8000 /*
+ * Allocates mp_ncpus slabs sized to
+ * sizeof(struct pcpu).
+ */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
*/
#define UMA_ZONE_INHERIT \
(UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
- UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
+ UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 1879b7e..e55768f 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -765,9 +765,9 @@ finished:
SKIP_NONE, ZFREE_STATFREE);
#ifdef UMA_DEBUG
printf("%s: Returning %d bytes.\n",
- keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
+ keg->uk_name, PAGE_SIZE * keg->uk_ppera);
#endif
- keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
+ keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
}
}
@@ -865,7 +865,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
wait |= M_NODUMP;
/* zone is passed for legacy reasons. */
- mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
+ mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab, NULL,
@@ -927,7 +927,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab,
NULL, SKIP_NONE, ZFREE_STATFREE);
- keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
+ keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera,
flags);
KEG_LOCK(keg);
return (NULL);
@@ -1138,16 +1138,27 @@ keg_small_init(uma_keg_t keg)
u_int wastedspace;
u_int shsize;
- KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
+ if (keg->uk_flags & UMA_ZONE_PCPU) {
+ keg->uk_slabsize = sizeof(struct pcpu);
+ keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
+ PAGE_SIZE);
+ } else {
+ keg->uk_slabsize = UMA_SLAB_SIZE;
+ keg->uk_ppera = 1;
+ }
+
rsize = keg->uk_size;
- if (rsize < UMA_SMALLEST_UNIT)
- rsize = UMA_SMALLEST_UNIT;
if (rsize & keg->uk_align)
rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
+ if (rsize < keg->uk_slabsize / 256)
+ rsize = keg->uk_slabsize / 256;
keg->uk_rsize = rsize;
- keg->uk_ppera = 1;
+
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
+ keg->uk_rsize < sizeof(struct pcpu),
+ ("%s: size %u too large", __func__, keg->uk_rsize));
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
shsize = 0;
@@ -1159,10 +1170,12 @@ keg_small_init(uma_keg_t keg)
shsize = sizeof(struct uma_slab);
}
- keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
- KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
+ keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
+ KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+ ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
+
memused = keg->uk_ipers * rsize + shsize;
- wastedspace = UMA_SLAB_SIZE - memused;
+ wastedspace = keg->uk_slabsize - memused;
/*
* We can't do OFFPAGE if we're internal or if we've been
@@ -1175,24 +1188,26 @@ keg_small_init(uma_keg_t keg)
(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
return;
- if ((wastedspace >= UMA_MAX_WASTE) &&
- (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
- keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
- KASSERT(keg->uk_ipers <= 255,
- ("keg_small_init: keg->uk_ipers too high!"));
+ if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
+ (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
+ keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
+ KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+ ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
#ifdef UMA_DEBUG
printf("UMA decided we need offpage slab headers for "
"keg: %s, calculated wastedspace = %d, "
"maximum wasted space allowed = %d, "
"calculated ipers = %d, "
"new wasted space = %d\n", keg->uk_name, wastedspace,
- UMA_MAX_WASTE, keg->uk_ipers,
- UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
+ keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
+ keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
#endif
keg->uk_flags |= UMA_ZONE_OFFPAGE;
- if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
- keg->uk_flags |= UMA_ZONE_HASH;
}
+
+ if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
+ (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
+ keg->uk_flags |= UMA_ZONE_HASH;
}
/*
@@ -1209,19 +1224,15 @@ keg_small_init(uma_keg_t keg)
static void
keg_large_init(uma_keg_t keg)
{
- int pages;
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+ ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
- pages = keg->uk_size / UMA_SLAB_SIZE;
-
- /* Account for remainder */
- if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
- pages++;
-
- keg->uk_ppera = pages;
+ keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
+ keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
keg->uk_ipers = 1;
keg->uk_rsize = keg->uk_size;
@@ -1242,6 +1253,9 @@ keg_cachespread_init(uma_keg_t keg)
int pages;
int rsize;
+ KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+ ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
+
alignsize = keg->uk_align + 1;
rsize = keg->uk_size;
/*
@@ -1259,6 +1273,7 @@ keg_cachespread_init(uma_keg_t keg)
pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
keg->uk_rsize = rsize;
keg->uk_ppera = pages;
+ keg->uk_slabsize = UMA_SLAB_SIZE;
keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
KASSERT(keg->uk_ipers <= uma_max_ipers,
@@ -1308,6 +1323,13 @@ keg_ctor(void *mem, int size, void *udata, int flags)
if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
keg->uk_flags |= UMA_ZONE_VTOSLAB;
+ if (arg->flags & UMA_ZONE_PCPU)
+#ifdef SMP
+ keg->uk_flags |= UMA_ZONE_OFFPAGE;
+#else
+ keg->uk_flags &= ~UMA_ZONE_PCPU;
+#endif
+
/*
* The +UMA_FRITM_SZ added to uk_size is to account for the
* linkage that is added to the size in keg_small_init(). If
@@ -1385,7 +1407,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
if (totsize & UMA_ALIGN_PTR)
totsize = (totsize & ~UMA_ALIGN_PTR) +
(UMA_ALIGN_PTR + 1);
- keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
+ keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
if (keg->uk_flags & UMA_ZONE_REFCNT)
totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
@@ -1401,7 +1423,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
* mathematically possible for all cases, so we make
* sure here anyway.
*/
- if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
+ if (totsize > PAGE_SIZE * keg->uk_ppera) {
printf("zone %s ipers %d rsize %d size %d\n",
zone->uz_name, keg->uk_ipers, keg->uk_rsize,
keg->uk_size);
@@ -1676,7 +1698,8 @@ uma_startup(void *bootmem, int boot_pages)
* that we need to go to offpage slab headers. Or, if we do,
* then we trap that condition below and panic in the INVARIANTS case.
*/
- wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
+ wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) -
+ (UMA_SLAB_SIZE / UMA_MAX_WASTE);
totsize = wsize;
objsize = UMA_SMALLEST_UNIT;
while (totsize >= wsize) {
@@ -1689,7 +1712,8 @@ uma_startup(void *bootmem, int boot_pages)
objsize--;
uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
- wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
+ wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
+ (UMA_SLAB_SIZE / UMA_MAX_WASTE);
totsize = wsize;
objsize = UMA_SMALLEST_UNIT;
while (totsize >= wsize) {
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index b8c2be0..ee02a46 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -120,8 +120,8 @@
#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */
-/* Max waste before going to off page slab management */
-#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10)
+/* Max waste percentage before going to off page slab management */
+#define UMA_MAX_WASTE 10
/*
* I doubt there will be many cases where this is exceeded. This is the initial
@@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t;
*
*/
struct uma_keg {
- LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
-
struct mtx uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
- const char *uk_name; /* Name of creating zone. */
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
@@ -225,10 +222,15 @@ struct uma_keg {
vm_offset_t uk_kva; /* Zone base KVA */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
+ u_int16_t uk_slabsize; /* Slab size for this keg */
u_int16_t uk_pgoff; /* Offset to uma_slab struct */
u_int16_t uk_ppera; /* pages per allocation from backend */
u_int16_t uk_ipers; /* Items per slab */
u_int32_t uk_flags; /* Internal flags */
+
+ /* Least used fields go to the last cache line. */
+ const char *uk_name; /* Name of creating zone. */
+ LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
};
typedef struct uma_keg * uma_keg_t;
OpenPOWER on IntegriCloud