summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2017-06-15 17:06:04 +0000
committeralc <alc@FreeBSD.org>2017-06-15 17:06:04 +0000
commit46991df369721e1c56721a358ac8dd2ddfc9d4bb (patch)
treeefef8fe9a30e01ac8dd0cb8301753fa2cb3a54fc
parenta81a73102ab59e959ce4a5ea3ddf6cd098c53828 (diff)
downloadFreeBSD-src-46991df369721e1c56721a358ac8dd2ddfc9d4bb.zip
FreeBSD-src-46991df369721e1c56721a358ac8dd2ddfc9d4bb.tar.gz
MFC r318995
In r118390, the swap pager's approach to striping swap allocation over multiple devices was changed. However, swapoff_one() was not fully and correctly converted. In particular, with r118390's introduction of a per- device blist, the maximum swap block size, "dmmax", became irrelevant to swapoff_one()'s operation. Moreover, swapoff_one() was performing out-of- range operations on the per-device blist that were silently ignored by blist_fill(). This change corrects both of these problems with swapoff_one(), which will allow us to potentially increase MAX_PAGEOUT_CLUSTER. Previously, swapoff_one() would panic inside of blist_fill() if you increased MAX_PAGEOUT_CLUSTER. MFC r319001 After r118390, the variable "dmmax" was neither the correct strip size nor the correct maximum block size. Moreover, after r318995, it serves no purpose except to provide information to user space through a read- sysctl. This change eliminates the variable "dmmax" but retains the sysctl. It also corrects the value returned by the sysctl. MFC r319604 Halve the memory being internally allocated by the blist allocator. In short, half of the memory that is allocated to implement the radix tree is wasted because we did not change "u_daddr_t" to be a 64-bit unsigned int when we changed "daddr_t" to be a 64-bit (signed) int. (See r96849 and r96851.) MFC r319612 When the function blist_fill() was added to the kernel in r107913, the swap pager used a different scheme for striping the allocation of swap space across multiple devices. And, although blist_fill() was intended to support fill operations with large counts, the old striping scheme never performed a fill larger than the stripe size. Consequently, the misplacement of a sanity check in blst_meta_fill() went undetected. Now, moving forward in time to r118390, a new scheme for striping was introduced that maintained a blist allocator per device, but as noted in r318995, swapoff_one() was not fully and correctly converted to the new scheme. This change completes what was started in r318995 by fixing the underlying bug in blst_meta_fill() that stops swapoff_one() from simply performing a single blist_fill() operation. MFC r319627 Starting in r118390, swaponsomething() began to reserve the blocks at the beginning of a swap area for a disk label. However, neither r118390 nor r118544, which increased the reservation from one to two blocks, correctly accounted for these blocks when updating the variable "swap_pager_avail". This change corrects that error. MFC r319655 Originally, this file could be compiled as a user-space application for testing purposes. However, over the years, various changes to the kernel have broken this feature. This revision applies some fixes to get user- space compilation working again. There are no changes in this revision to code that is used by the kernel. Approved by: re (kib)
-rw-r--r--sys/kern/subr_blist.c29
-rw-r--r--sys/sys/blist.h4
-rw-r--r--sys/vm/swap_pager.c27
3 files changed, 20 insertions, 40 deletions
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index fb3526b..9855502 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -99,9 +99,8 @@ __FBSDID("$FreeBSD$");
#define BLIST_DEBUG
#endif
-#define SWAPBLK_NONE ((daddr_t)-1)
-
#include <sys/types.h>
+#include <sys/malloc.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -110,8 +109,6 @@ __FBSDID("$FreeBSD$");
#define malloc(a,b,c) calloc(a, 1)
#define free(a,b) free(a)
-typedef unsigned int u_daddr_t;
-
#include <sys/blist.h>
void panic(const char *ctl, ...);
@@ -366,7 +363,7 @@ blst_leaf_alloc(
j >>= 1;
mask >>= j;
}
- scan->u.bmu_bitmap &= ~(1 << r);
+ scan->u.bmu_bitmap &= ~((u_daddr_t)1 << r);
return(blk + r);
}
if (count <= BLIST_BMAP_RADIX) {
@@ -658,7 +655,7 @@ static void blst_copy(
int i;
for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
- if (v & (1 << i))
+ if (v & ((u_daddr_t)1 << i))
blist_free(dest, blk + i, 1);
}
}
@@ -769,6 +766,8 @@ blst_meta_fill(
int next_skip = ((u_int)skip / BLIST_META_RADIX);
int nblks = 0;
+ if (count > radix)
+ panic("blist_meta_fill: allocation too large");
if (count == radix || scan->u.bmu_avail == 0) {
/*
* ALL-ALLOCATED special case
@@ -800,9 +799,6 @@ blst_meta_fill(
radix /= BLIST_META_RADIX;
}
- if (count > radix)
- panic("blist_meta_fill: allocation too large");
-
i = (allocBlk - blk) / radix;
blk += i * radix;
i = i * next_skip + 1;
@@ -922,7 +918,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
if (radix == BLIST_BMAP_RADIX) {
printf(
- "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
+ "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
tab, tab, "",
(long long)blk, (long long)radix,
(long long)scan->u.bmu_bitmap,
@@ -1016,9 +1012,8 @@ main(int ac, char **av)
for (;;) {
char buf[1024];
- daddr_t da = 0;
- daddr_t count = 0;
-
+ long long da = 0;
+ long long count = 0;
printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
(long long)size, (long long)bl->bl_radix);
@@ -1028,7 +1023,7 @@ main(int ac, char **av)
switch(buf[0]) {
case 'r':
if (sscanf(buf + 1, "%lld", &count) == 1) {
- blist_resize(&bl, count, 1);
+ blist_resize(&bl, count, 1, M_WAITOK);
} else {
printf("?\n");
}
@@ -1044,16 +1039,14 @@ main(int ac, char **av)
}
break;
case 'f':
- if (sscanf(buf + 1, "%llx %lld",
- (long long *)&da, (long long *)&count) == 2) {
+ if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
blist_free(bl, da, count);
} else {
printf("?\n");
}
break;
case 'l':
- if (sscanf(buf + 1, "%llx %lld",
- (long long *)&da, (long long *)&count) == 2) {
+ if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
printf(" n=%d\n",
blist_fill(bl, da, count));
} else {
diff --git a/sys/sys/blist.h b/sys/sys/blist.h
index 1ddffed..7705353 100644
--- a/sys/sys/blist.h
+++ b/sys/sys/blist.h
@@ -44,7 +44,7 @@
* ops.
*
* SWAPBLK_NONE is returned on failure. This module is typically
- * capable of managing up to (2^31) blocks per blist, though
+ * capable of managing up to (2^63) blocks per blist, though
* the memory utilization would be insane if you actually did
* that. Managing something like 512MB worth of 4K blocks
* eats around 32 KBytes of memory.
@@ -56,7 +56,7 @@
#ifndef _SYS_BLIST_H_
#define _SYS_BLIST_H_
-typedef u_int32_t u_daddr_t; /* unsigned disk address */
+typedef uint64_t u_daddr_t; /* unsigned disk address */
/*
* note: currently use SWAPBLK_NONE as an absolute value rather then
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 0167117..b89a254 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -115,9 +115,8 @@ __FBSDID("$FreeBSD$");
#include <geom/geom.h>
/*
- * SWB_NPAGES must be a power of 2. It may be set to 1, 2, 4, 8, 16
- * or 32 pages per allocation.
- * The 32-page limit is due to the radix code (kern/subr_blist.c).
+ * MAX_PAGEOUT_CLUSTER must be a power of 2 between 1 and 64.
+ * The 64-page limit is due to the radix code (kern/subr_blist.c).
*/
#ifndef MAX_PAGEOUT_CLUSTER
#define MAX_PAGEOUT_CLUSTER 16
@@ -380,18 +379,14 @@ struct pagerops swappagerops = {
};
/*
- * dmmax is in page-sized chunks with the new swap system. It was
- * dev-bsized chunks in the old. dmmax is always a power of 2.
- *
* swap_*() routines are externally accessible. swp_*() routines are
* internal.
*/
-static int dmmax;
static int nswap_lowat = 128; /* in pages, swap_pager_almost_full warn */
static int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */
-SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &dmmax, 0,
- "Maximum size of a swap block");
+SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &nsw_cluster_max, 0,
+ "Maximum size of a swap block in pages");
static void swp_sizecheck(void);
static void swp_pager_async_iodone(struct buf *bp);
@@ -488,11 +483,6 @@ swap_pager_init(void)
mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF);
sx_init(&sw_alloc_sx, "swspsx");
sx_init(&swdev_syscall_lock, "swsysc");
-
- /*
- * Device Stripe, in PAGE_SIZE'd blocks
- */
- dmmax = SWB_NPAGES * 2;
}
/*
@@ -2204,7 +2194,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
sp->sw_end = dvbase + nblks;
TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
nswapdev++;
- swap_pager_avail += nblks;
+ swap_pager_avail += nblks - 2;
swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
swapon_check_swzone(swap_total / PAGE_SIZE);
swp_sizecheck();
@@ -2271,7 +2261,7 @@ done:
static int
swapoff_one(struct swdevt *sp, struct ucred *cred)
{
- u_long nblks, dvbase;
+ u_long nblks;
#ifdef MAC
int error;
#endif
@@ -2302,10 +2292,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
*/
mtx_lock(&sw_dev_mtx);
sp->sw_flags |= SW_CLOSING;
- for (dvbase = 0; dvbase < sp->sw_end; dvbase += dmmax) {
- swap_pager_avail -= blist_fill(sp->sw_blist,
- dvbase, dmmax);
- }
+ swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
mtx_unlock(&sw_dev_mtx);
OpenPOWER on IntegriCloud