diff options
author | alc <alc@FreeBSD.org> | 2017-06-15 17:06:04 +0000 |
---|---|---|
committer | alc <alc@FreeBSD.org> | 2017-06-15 17:06:04 +0000 |
commit | 46991df369721e1c56721a358ac8dd2ddfc9d4bb (patch) | |
tree | efef8fe9a30e01ac8dd0cb8301753fa2cb3a54fc | |
parent | a81a73102ab59e959ce4a5ea3ddf6cd098c53828 (diff) | |
download | FreeBSD-src-46991df369721e1c56721a358ac8dd2ddfc9d4bb.zip FreeBSD-src-46991df369721e1c56721a358ac8dd2ddfc9d4bb.tar.gz |
MFC r318995
In r118390, the swap pager's approach to striping swap allocation over
multiple devices was changed. However, swapoff_one() was not fully and
correctly converted. In particular, with r118390's introduction of a per-
device blist, the maximum swap block size, "dmmax", became irrelevant to
swapoff_one()'s operation. Moreover, swapoff_one() was performing out-of-
range operations on the per-device blist that were silently ignored by
blist_fill().
This change corrects both of these problems with swapoff_one(), which will
allow us to potentially increase MAX_PAGEOUT_CLUSTER. Previously,
swapoff_one() would panic inside of blist_fill() if you increased
MAX_PAGEOUT_CLUSTER.
MFC r319001
After r118390, the variable "dmmax" was neither the correct strip size
nor the correct maximum block size. Moreover, after r318995, it serves
no purpose except to provide information to user space through a read-
sysctl.
This change eliminates the variable "dmmax" but retains the sysctl. It
also corrects the value returned by the sysctl.
MFC r319604
Halve the memory being internally allocated by the blist allocator. In
short, half of the memory that is allocated to implement the radix tree is
wasted because we did not change "u_daddr_t" to be a 64-bit unsigned int
when we changed "daddr_t" to be a 64-bit (signed) int. (See r96849 and
r96851.)
MFC r319612
When the function blist_fill() was added to the kernel in r107913, the swap
pager used a different scheme for striping the allocation of swap space
across multiple devices. And, although blist_fill() was intended to support
fill operations with large counts, the old striping scheme never performed a
fill larger than the stripe size. Consequently, the misplacement of a
sanity check in blst_meta_fill() went undetected. Now, moving forward in
time to r118390, a new scheme for striping was introduced that maintained a
blist allocator per device, but as noted in r318995, swapoff_one() was not
fully and correctly converted to the new scheme. This change completes what
was started in r318995 by fixing the underlying bug in blst_meta_fill() that
stops swapoff_one() from simply performing a single blist_fill() operation.
MFC r319627
Starting in r118390, swaponsomething() began to reserve the blocks at the
beginning of a swap area for a disk label. However, neither r118390 nor
r118544, which increased the reservation from one to two blocks, correctly
accounted for these blocks when updating the variable "swap_pager_avail".
This change corrects that error.
MFC r319655
Originally, this file could be compiled as a user-space application for
testing purposes. However, over the years, various changes to the kernel
have broken this feature. This revision applies some fixes to get user-
space compilation working again. There are no changes in this revision
to code that is used by the kernel.
Approved by: re (kib)
-rw-r--r-- | sys/kern/subr_blist.c | 29 | ||||
-rw-r--r-- | sys/sys/blist.h | 4 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 27 |
3 files changed, 20 insertions, 40 deletions
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c index fb3526b..9855502 100644 --- a/sys/kern/subr_blist.c +++ b/sys/kern/subr_blist.c @@ -99,9 +99,8 @@ __FBSDID("$FreeBSD$"); #define BLIST_DEBUG #endif -#define SWAPBLK_NONE ((daddr_t)-1) - #include <sys/types.h> +#include <sys/malloc.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -110,8 +109,6 @@ __FBSDID("$FreeBSD$"); #define malloc(a,b,c) calloc(a, 1) #define free(a,b) free(a) -typedef unsigned int u_daddr_t; - #include <sys/blist.h> void panic(const char *ctl, ...); @@ -366,7 +363,7 @@ blst_leaf_alloc( j >>= 1; mask >>= j; } - scan->u.bmu_bitmap &= ~(1 << r); + scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r); return(blk + r); } if (count <= BLIST_BMAP_RADIX) { @@ -658,7 +655,7 @@ static void blst_copy( int i; for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { - if (v & (1 << i)) + if (v & ((u_daddr_t)1 << i)) blist_free(dest, blk + i, 1); } } @@ -769,6 +766,8 @@ blst_meta_fill( int next_skip = ((u_int)skip / BLIST_META_RADIX); int nblks = 0; + if (count > radix) + panic("blist_meta_fill: allocation too large"); if (count == radix || scan->u.bmu_avail == 0) { /* * ALL-ALLOCATED special case @@ -800,9 +799,6 @@ blst_meta_fill( radix /= BLIST_META_RADIX; } - if (count > radix) - panic("blist_meta_fill: allocation too large"); - i = (allocBlk - blk) / radix; blk += i * radix; i = i * next_skip + 1; @@ -922,7 +918,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) if (radix == BLIST_BMAP_RADIX) { printf( - "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n", + "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", tab, tab, "", (long long)blk, (long long)radix, (long long)scan->u.bmu_bitmap, @@ -1016,9 +1012,8 @@ main(int ac, char **av) for (;;) { char buf[1024]; - daddr_t da = 0; - daddr_t count = 0; - + long long da = 0; + long long count = 0; printf("%lld/%lld/%lld> ", (long long)bl->bl_free, (long long)size, (long long)bl->bl_radix); @@ -1028,7 +1023,7 @@ main(int ac, char **av) switch(buf[0]) { case 'r': if (sscanf(buf + 1, "%lld", &count) == 1) { - blist_resize(&bl, count, 1); + blist_resize(&bl, count, 1, M_WAITOK); } else { printf("?\n"); } @@ -1044,16 +1039,14 @@ main(int ac, char **av) } break; case 'f': - if (sscanf(buf + 1, "%llx %lld", - (long long *)&da, (long long *)&count) == 2) { + if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { blist_free(bl, da, count); } else { printf("?\n"); } break; case 'l': - if (sscanf(buf + 1, "%llx %lld", - (long long *)&da, (long long *)&count) == 2) { + if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { printf(" n=%d\n", blist_fill(bl, da, count)); } else { diff --git a/sys/sys/blist.h b/sys/sys/blist.h index 1ddffed..7705353 100644 --- a/sys/sys/blist.h +++ b/sys/sys/blist.h @@ -44,7 +44,7 @@ * ops. * * SWAPBLK_NONE is returned on failure. This module is typically - * capable of managing up to (2^31) blocks per blist, though + * capable of managing up to (2^63) blocks per blist, though * the memory utilization would be insane if you actually did * that. Managing something like 512MB worth of 4K blocks * eats around 32 KBytes of memory. @@ -56,7 +56,7 @@ #ifndef _SYS_BLIST_H_ #define _SYS_BLIST_H_ -typedef u_int32_t u_daddr_t; /* unsigned disk address */ +typedef uint64_t u_daddr_t; /* unsigned disk address */ /* * note: currently use SWAPBLK_NONE as an absolute value rather then diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 0167117..b89a254 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -115,9 +115,8 @@ __FBSDID("$FreeBSD$"); #include <geom/geom.h> /* - * SWB_NPAGES must be a power of 2. It may be set to 1, 2, 4, 8, 16 - * or 32 pages per allocation. - * The 32-page limit is due to the radix code (kern/subr_blist.c). + * MAX_PAGEOUT_CLUSTER must be a power of 2 between 1 and 64. + * The 64-page limit is due to the radix code (kern/subr_blist.c). */ #ifndef MAX_PAGEOUT_CLUSTER #define MAX_PAGEOUT_CLUSTER 16 @@ -380,18 +379,14 @@ struct pagerops swappagerops = { }; /* - * dmmax is in page-sized chunks with the new swap system. It was - * dev-bsized chunks in the old. dmmax is always a power of 2. - * * swap_*() routines are externally accessible. swp_*() routines are * internal. */ -static int dmmax; static int nswap_lowat = 128; /* in pages, swap_pager_almost_full warn */ static int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */ -SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &dmmax, 0, - "Maximum size of a swap block"); +SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &nsw_cluster_max, 0, + "Maximum size of a swap block in pages"); static void swp_sizecheck(void); static void swp_pager_async_iodone(struct buf *bp); @@ -488,11 +483,6 @@ swap_pager_init(void) mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); sx_init(&sw_alloc_sx, "swspsx"); sx_init(&swdev_syscall_lock, "swsysc"); - - /* - * Device Stripe, in PAGE_SIZE'd blocks - */ - dmmax = SWB_NPAGES * 2; } /* @@ -2204,7 +2194,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, sp->sw_end = dvbase + nblks; TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; - swap_pager_avail += nblks; + swap_pager_avail += nblks - 2; swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; swapon_check_swzone(swap_total / PAGE_SIZE); swp_sizecheck(); @@ -2271,7 +2261,7 @@ done: static int swapoff_one(struct swdevt *sp, struct ucred *cred) { - u_long nblks, dvbase; + u_long nblks; #ifdef MAC int error; #endif @@ -2302,10 +2292,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred) */ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; - for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) { - swap_pager_avail -= blist_fill(sp->sw_blist, - dvbase, dmmax); - } + swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks); swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE; mtx_unlock(&sw_dev_mtx); |