summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorgreen <green@FreeBSD.org>2004-10-08 20:19:29 +0000
committergreen <green@FreeBSD.org>2004-10-08 20:19:29 +0000
commit9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565 (patch)
tree50327d4c1c80261747d0d09c606631d722403fd2 /sys
parenta0fc078f8632157112a60136eb368dbc8203e93f (diff)
downloadFreeBSD-src-9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565.zip
FreeBSD-src-9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565.tar.gz
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption, and at least on i386 a first level splash damage radius that encompasses up to about half a megabyte of the memory after an mbuf cluster's allocation slab. In short, this has caused instability nightmares anywhere the right kind of network traffic is present. When the polymorphic refcount slabs were added to UMA, the new types were not used pervasively. In particular, the slab management structure was turned into one for refcounts, and one for non-refcounts (supposed to be mostly like the old slab management structure), but the latter was almost always used through out. In general, every access to zones with UMA_ZONE_REFCNT turned on corrupted the "next free" slab offset offset and the refcount with each other and with other allocations (on i386, 2 mbuf clusters per 4096 byte slab). Fix things so that the right type is used to access refcounted zones where it was not before. There are additional errors in gross overestimation of padding, it seems, that would cause a large kegs (nee zones) to be allocated when small ones would do. Unless I have analyzed this incorrectly, it is not directly harmful.
Diffstat (limited to 'sys')
-rw-r--r--sys/vm/uma_core.c38
-rw-r--r--sys/vm/uma_dbg.c50
2 files changed, 64 insertions, 24 deletions
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 31c7fe8..c4c70de 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -232,7 +232,7 @@ void uma_print_stats(void);
static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
#ifdef WITNESS
-static int nosleepwithlocks = 1;
+static int nosleepwithlocks = 0;
SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
#else
@@ -825,13 +825,16 @@ slab_zalloc(uma_zone_t zone, int wait)
slab->us_freecount = keg->uk_ipers;
slab->us_firstfree = 0;
slab->us_flags = flags;
- for (i = 0; i < keg->uk_ipers; i++)
- slab->us_freelist[i].us_item = i+1;
if (keg->uk_flags & UMA_ZONE_REFCNT) {
slabref = (uma_slabrefcnt_t)slab;
- for (i = 0; i < keg->uk_ipers; i++)
+ for (i = 0; i < keg->uk_ipers; i++) {
slabref->us_freelist[i].us_refcnt = 0;
+ slabref->us_freelist[i].us_item = i+1;
+ }
+ } else {
+ for (i = 0; i < keg->uk_ipers; i++)
+ slab->us_freelist[i].us_item = i+1;
}
if (keg->uk_init != NULL) {
@@ -1983,13 +1986,19 @@ static void *
uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
{
uma_keg_t keg;
+ uma_slabrefcnt_t slabref;
void *item;
u_int8_t freei;
keg = zone->uz_keg;
freei = slab->us_firstfree;
- slab->us_firstfree = slab->us_freelist[freei].us_item;
+ if (keg->uk_flags & UMA_ZONE_REFCNT) {
+ slabref = (uma_slabrefcnt_t)slab;
+ slab->us_firstfree = slabref->us_freelist[freei].us_item;
+ } else {
+ slab->us_firstfree = slab->us_freelist[freei].us_item;
+ }
item = slab->us_data + (keg->uk_rsize * freei);
slab->us_freecount--;
@@ -2339,6 +2348,7 @@ uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
enum zfreeskip skip)
{
uma_slab_t slab;
+ uma_slabrefcnt_t slabref;
uma_keg_t keg;
u_int8_t *mem;
u_int8_t freei;
@@ -2382,7 +2392,12 @@ uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
uma_dbg_free(zone, slab, item);
#endif
- slab->us_freelist[freei].us_item = slab->us_firstfree;
+ if (keg->uk_flags & UMA_ZONE_REFCNT) {
+ slabref = (uma_slabrefcnt_t)slab;
+ slabref->us_freelist[freei].us_item = slab->us_firstfree;
+ } else {
+ slab->us_freelist[freei].us_item = slab->us_firstfree;
+ }
slab->us_firstfree = freei;
slab->us_freecount++;
@@ -2545,18 +2560,19 @@ uma_prealloc(uma_zone_t zone, int items)
u_int32_t *
uma_find_refcnt(uma_zone_t zone, void *item)
{
- uma_slabrefcnt_t slab;
+ uma_slabrefcnt_t slabref;
uma_keg_t keg;
u_int32_t *refcnt;
int idx;
keg = zone->uz_keg;
- slab = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
- KASSERT(slab != NULL,
+ slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
+ (~UMA_SLAB_MASK));
+ KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
- idx = ((unsigned long)item - (unsigned long)slab->us_data)
+ idx = ((unsigned long)item - (unsigned long)slabref->us_data)
/ keg->uk_rsize;
- refcnt = &(slab->us_freelist[idx].us_refcnt);
+ refcnt = &slabref->us_freelist[idx].us_refcnt;
return refcnt;
}
diff --git a/sys/vm/uma_dbg.c b/sys/vm/uma_dbg.c
index cbf164a..963a36e 100644
--- a/sys/vm/uma_dbg.c
+++ b/sys/vm/uma_dbg.c
@@ -218,6 +218,7 @@ void
uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
{
uma_keg_t keg;
+ uma_slabrefcnt_t slabref;
int freei;
keg = zone->uz_keg;
@@ -231,7 +232,12 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
freei = ((unsigned long)item - (unsigned long)slab->us_data)
/ keg->uk_rsize;
- slab->us_freelist[freei].us_item = 255;
+ if (keg->uk_flags & UMA_ZONE_REFCNT) {
+ slabref = (uma_slabrefcnt_t)slab;
+ slabref->us_freelist[freei].us_item = 255;
+ } else {
+ slab->us_freelist[freei].us_item = 255;
+ }
return;
}
@@ -246,6 +252,7 @@ void
uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
{
uma_keg_t keg;
+ uma_slabrefcnt_t slabref;
int freei;
keg = zone->uz_keg;
@@ -270,17 +277,34 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
(freei * keg->uk_rsize) + slab->us_data);
}
- if (slab->us_freelist[freei].us_item != 255) {
- printf("Slab at %p, freei %d = %d.\n",
- slab, freei, slab->us_freelist[freei].us_item);
- panic("Duplicate free of item %p from zone %p(%s)\n",
- item, zone, zone->uz_name);
- }
+ if (keg->uk_flags & UMA_ZONE_REFCNT) {
+ slabref = (uma_slabrefcnt_t)slab;
+ if (slabref->us_freelist[freei].us_item != 255) {
+ printf("Slab at %p, freei %d = %d.\n",
+ slab, freei, slabref->us_freelist[freei].us_item);
+ panic("Duplicate free of item %p from zone %p(%s)\n",
+ item, zone, zone->uz_name);
+ }
- /*
- * When this is actually linked into the slab this will change.
- * Until then the count of valid slabs will make sure we don't
- * accidentally follow this and assume it's a valid index.
- */
- slab->us_freelist[freei].us_item = 0;
+ /*
+ * When this is actually linked into the slab this will change.
+ * Until then the count of valid slabs will make sure we don't
+ * accidentally follow this and assume it's a valid index.
+ */
+ slabref->us_freelist[freei].us_item = 0;
+ } else {
+ if (slab->us_freelist[freei].us_item != 255) {
+ printf("Slab at %p, freei %d = %d.\n",
+ slab, freei, slab->us_freelist[freei].us_item);
+ panic("Duplicate free of item %p from zone %p(%s)\n",
+ item, zone, zone->uz_name);
+ }
+
+ /*
+ * When this is actually linked into the slab this will change.
+ * Until then the count of valid slabs will make sure we don't
+ * accidentally follow this and assume it's a valid index.
+ */
+ slab->us_freelist[freei].us_item = 0;
+ }
}
OpenPOWER on IntegriCloud