diff options
author | green <green@FreeBSD.org> | 2004-10-08 20:19:29 +0000 |
---|---|---|
committer | green <green@FreeBSD.org> | 2004-10-08 20:19:29 +0000 |
commit | 9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565 (patch) | |
tree | 50327d4c1c80261747d0d09c606631d722403fd2 /sys/vm | |
parent | a0fc078f8632157112a60136eb368dbc8203e93f (diff) | |
download | FreeBSD-src-9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565.zip FreeBSD-src-9128ff1ce9a84a3099a1b9e8ae5cfc56b2276565.tar.gz |
Fix critical stability problems that can cause UMA mbuf cluster
state management corruption, mbuf leaks, general mbuf corruption,
and at least on i386 a first level splash damage radius that
encompasses up to about half a megabyte of the memory after
an mbuf cluster's allocation slab. In short, this has caused
instability nightmares anywhere the right kind of network traffic
is present.
When the polymorphic refcount slabs were added to UMA, the new types
were not used pervasively. In particular, the slab management
structure was turned into one for refcounts, and one for non-refcounts
(supposed to be mostly like the old slab management structure),
but the latter was almost always used through out. In general, every
access to zones with UMA_ZONE_REFCNT turned on corrupted the
"next free" slab offset offset and the refcount with each other and
with other allocations (on i386, 2 mbuf clusters per 4096 byte slab).
Fix things so that the right type is used to access refcounted zones
where it was not before. There are additional errors in gross
overestimation of padding, it seems, that would cause a large kegs
(nee zones) to be allocated when small ones would do. Unless I have
analyzed this incorrectly, it is not directly harmful.
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/uma_core.c | 38 | ||||
-rw-r--r-- | sys/vm/uma_dbg.c | 50 |
2 files changed, 64 insertions, 24 deletions
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 31c7fe8..c4c70de 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -232,7 +232,7 @@ void uma_print_stats(void); static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); #ifdef WITNESS -static int nosleepwithlocks = 1; +static int nosleepwithlocks = 0; SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); #else @@ -825,13 +825,16 @@ slab_zalloc(uma_zone_t zone, int wait) slab->us_freecount = keg->uk_ipers; slab->us_firstfree = 0; slab->us_flags = flags; - for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; if (keg->uk_flags & UMA_ZONE_REFCNT) { slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) + for (i = 0; i < keg->uk_ipers; i++) { slabref->us_freelist[i].us_refcnt = 0; + slabref->us_freelist[i].us_item = i+1; + } + } else { + for (i = 0; i < keg->uk_ipers; i++) + slab->us_freelist[i].us_item = i+1; } if (keg->uk_init != NULL) { @@ -1983,13 +1986,19 @@ static void * uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) { uma_keg_t keg; + uma_slabrefcnt_t slabref; void *item; u_int8_t freei; keg = zone->uz_keg; freei = slab->us_firstfree; - slab->us_firstfree = slab->us_freelist[freei].us_item; + if (keg->uk_flags & UMA_ZONE_REFCNT) { + slabref = (uma_slabrefcnt_t)slab; + slab->us_firstfree = slabref->us_freelist[freei].us_item; + } else { + slab->us_firstfree = slab->us_freelist[freei].us_item; + } item = slab->us_data + (keg->uk_rsize * freei); slab->us_freecount--; @@ -2339,6 +2348,7 @@ uma_zfree_internal(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) { uma_slab_t slab; + uma_slabrefcnt_t slabref; uma_keg_t keg; u_int8_t *mem; u_int8_t freei; @@ -2382,7 +2392,12 @@ uma_zfree_internal(uma_zone_t zone, void *item, void *udata, uma_dbg_free(zone, slab, item); #endif - slab->us_freelist[freei].us_item = slab->us_firstfree; + if (keg->uk_flags & UMA_ZONE_REFCNT) { + slabref = (uma_slabrefcnt_t)slab; + slabref->us_freelist[freei].us_item = slab->us_firstfree; + } else { + slab->us_freelist[freei].us_item = slab->us_firstfree; + } slab->us_firstfree = freei; slab->us_freecount++; @@ -2545,18 +2560,19 @@ uma_prealloc(uma_zone_t zone, int items) u_int32_t * uma_find_refcnt(uma_zone_t zone, void *item) { - uma_slabrefcnt_t slab; + uma_slabrefcnt_t slabref; uma_keg_t keg; u_int32_t *refcnt; int idx; keg = zone->uz_keg; - slab = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); - KASSERT(slab != NULL, + slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & + (~UMA_SLAB_MASK)); + KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); - idx = ((unsigned long)item - (unsigned long)slab->us_data) + idx = ((unsigned long)item - (unsigned long)slabref->us_data) / keg->uk_rsize; - refcnt = &(slab->us_freelist[idx].us_refcnt); + refcnt = &slabref->us_freelist[idx].us_refcnt; return refcnt; } diff --git a/sys/vm/uma_dbg.c b/sys/vm/uma_dbg.c index cbf164a..963a36e 100644 --- a/sys/vm/uma_dbg.c +++ b/sys/vm/uma_dbg.c @@ -218,6 +218,7 @@ void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; + uma_slabrefcnt_t slabref; int freei; keg = zone->uz_keg; @@ -231,7 +232,12 @@ uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) freei = ((unsigned long)item - (unsigned long)slab->us_data) / keg->uk_rsize; - slab->us_freelist[freei].us_item = 255; + if (keg->uk_flags & UMA_ZONE_REFCNT) { + slabref = (uma_slabrefcnt_t)slab; + slabref->us_freelist[freei].us_item = 255; + } else { + slab->us_freelist[freei].us_item = 255; + } return; } @@ -246,6 +252,7 @@ void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; + uma_slabrefcnt_t slabref; int freei; keg = zone->uz_keg; @@ -270,17 +277,34 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) (freei * keg->uk_rsize) + slab->us_data); } - if (slab->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } + if (keg->uk_flags & UMA_ZONE_REFCNT) { + slabref = (uma_slabrefcnt_t)slab; + if (slabref->us_freelist[freei].us_item != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slabref->us_freelist[freei].us_item); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); + } - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slab->us_freelist[freei].us_item = 0; + /* + * When this is actually linked into the slab this will change. + * Until then the count of valid slabs will make sure we don't + * accidentally follow this and assume it's a valid index. + */ + slabref->us_freelist[freei].us_item = 0; + } else { + if (slab->us_freelist[freei].us_item != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slab->us_freelist[freei].us_item); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); + } + + /* + * When this is actually linked into the slab this will change. + * Until then the count of valid slabs will make sure we don't + * accidentally follow this and assume it's a valid index. + */ + slab->us_freelist[freei].us_item = 0; + } } |