summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_page.c
diff options
context:
space:
mode:
authorscottl <scottl@FreeBSD.org>2015-04-29 15:57:14 +0000
committerscottl <scottl@FreeBSD.org>2015-04-29 15:57:14 +0000
commitcac1f63fc81939aa1d40ae21f1ba61254985c8c2 (patch)
tree30c1b4d4f42d182a3a951b4ae53f192154ae3ffc /sys/vm/vm_page.c
parentb7942492e4ad09c98b966b7ea5ba3dd8f4ba763f (diff)
downloadFreeBSD-src-cac1f63fc81939aa1d40ae21f1ba61254985c8c2.zip
FreeBSD-src-cac1f63fc81939aa1d40ae21f1ba61254985c8c2.tar.gz
Improve support for blacklisting bad memory locations. The user can supply
a text file with a list of physical memory addresses to exclude, and have it loaded at boot time via the provided example in loader.conf. The tunable 'vm.blacklist' remains, but using an external file means that there's no practical limit to the size of the list. This change also improves the scanning algorithm for processing the list, scanning the list only once instead of scanning it for every page in the system. Both the sysctl and the file can be unsorted and contain duplicates so long as each entry is numeric (decimal or hex) and is separated by a space, comma, or newline character. The sysctl 'vm.page_blacklist' is now provided to report what memory locations were successfully excluded. Reviewed by: imp, emax Obtained from: Netflix, Inc. MFC after: 3 days
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r--sys/vm/vm_page.c178
1 files changed, 153 insertions, 25 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 64ca130..6cdbbc7 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -91,12 +91,14 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/limits.h>
+#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
@@ -142,6 +144,12 @@ static int pa_tryrelock_restart;
SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
&pa_tryrelock_restart, 0, "Number of tryrelock restarts");
+static TAILQ_HEAD(, vm_page) blacklist_head;
+static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
+
+
static uma_zone_t fakepg_zone;
static struct vnode *vm_page_alloc_init(vm_page_t m);
@@ -216,35 +224,153 @@ vm_set_page_size(void)
}
/*
- * vm_page_blacklist_lookup:
+ * vm_page_blacklist_next:
*
- * See if a physical address in this page has been listed
- * in the blacklist tunable. Entries in the tunable are
- * separated by spaces or commas. If an invalid integer is
- * encountered then the rest of the string is skipped.
+ * Find the next entry in the provided string of blacklist
+ * addresses. Entries are separated by space, comma, or newline.
+ * If an invalid integer is encountered then the rest of the
+ * string is skipped. Updates the list pointer to the next
+ * character, or NULL if the string is exhausted or invalid.
*/
-static int
-vm_page_blacklist_lookup(char *list, vm_paddr_t pa)
+static vm_paddr_t
+vm_page_blacklist_next(char **list, char *end)
{
vm_paddr_t bad;
char *cp, *pos;
- for (pos = list; *pos != '\0'; pos = cp) {
+ if (list == NULL || *list == NULL)
+ return (0);
+ if (**list =='\0') {
+ *list = NULL;
+ return (0);
+ }
+
+ /*
+ * If there's no end pointer then the buffer is coming from
+ * the kenv and we know it's null-terminated.
+ */
+ if (end == NULL)
+ end = *list + strlen(*list);
+
+ /* Ensure that strtoq() won't walk off the end */
+ if (*end != '\0') {
+ if (*end == '\n' || *end == ' ' || *end == ',')
+ *end = '\0';
+ else {
+ printf("Blacklist not terminated, skipping\n");
+ *list = NULL;
+ return (0);
+ }
+ }
+
+ for (pos = *list; *pos != '\0'; pos = cp) {
bad = strtoq(pos, &cp, 0);
- if (*cp != '\0') {
- if (*cp == ' ' || *cp == ',') {
- cp++;
- if (cp == pos)
+ if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
+ if (bad == 0) {
+ if (++cp < end)
continue;
- } else
- break;
- }
- if (pa == trunc_page(bad))
- return (1);
+ else
+ break;
+ }
+ } else
+ break;
+ if (*cp == '\0' || ++cp >= end)
+ *list = NULL;
+ else
+ *list = cp;
+ return (trunc_page(bad));
}
+ printf("Garbage in RAM blacklist, skipping\n");
+ *list = NULL;
return (0);
}
+/*
+ * vm_page_blacklist_check:
+ *
+ * Iterate through the provided string of blacklist addresses, pulling
+ * each entry out of the physical allocator free list and putting it
+ * onto a list for reporting via the vm.page_blacklist sysctl.
+ */
+static void
+vm_page_blacklist_check(char *list, char *end)
+{
+ vm_paddr_t pa;
+ vm_page_t m;
+ char *next;
+ int ret;
+
+ next = list;
+ while (next != NULL) {
+ if ((pa = vm_page_blacklist_next(&next, end)) == 0)
+ continue;
+ m = vm_phys_paddr_to_vm_page(pa);
+ if (m == NULL)
+ continue;
+ mtx_lock(&vm_page_queue_free_mtx);
+ ret = vm_phys_unfree_page(m);
+ mtx_unlock(&vm_page_queue_free_mtx);
+ if (ret == TRUE) {
+ TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
+ if (bootverbose)
+ printf("Skipping page with pa 0x%jx\n",
+ (uintmax_t)pa);
+ }
+ }
+}
+
+/*
+ * vm_page_blacklist_load:
+ *
+ * Search for a special module named "ram_blacklist". It'll be a
+ * plain text file provided by the user via the loader directive
+ * of the same name.
+ */
+static void
+vm_page_blacklist_load(char **list, char **end)
+{
+ void *mod;
+ u_char *ptr;
+ u_int len;
+
+ mod = NULL;
+ ptr = NULL;
+
+ mod = preload_search_by_type("ram_blacklist");
+ if (mod != NULL) {
+ ptr = preload_fetch_addr(mod);
+ len = preload_fetch_size(mod);
+ }
+ *list = ptr;
+ if (ptr != NULL)
+ *end = ptr + len;
+ else
+ *end = NULL;
+ return;
+}
+
+static int
+sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
+{
+ vm_page_t m;
+ struct sbuf sbuf;
+ int error, first;
+
+ first = 1;
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ TAILQ_FOREACH(m, &blacklist_head, listq) {
+ sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
+ (uintmax_t)m->phys_addr);
+ first = 0;
+ }
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+ return (error);
+}
+
static void
vm_page_domain_init(struct vm_domain *vmd)
{
@@ -290,7 +416,7 @@ vm_page_startup(vm_offset_t vaddr)
int i;
vm_paddr_t pa;
vm_paddr_t last_pa;
- char *list;
+ char *list, *listend;
vm_paddr_t end;
vm_paddr_t biggestsize;
vm_paddr_t low_water, high_water;
@@ -477,20 +603,22 @@ vm_page_startup(vm_offset_t vaddr)
*/
vm_cnt.v_page_count = 0;
vm_cnt.v_free_count = 0;
- list = kern_getenv("vm.blacklist");
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
pa = phys_avail[i];
last_pa = phys_avail[i + 1];
while (pa < last_pa) {
- if (list != NULL &&
- vm_page_blacklist_lookup(list, pa))
- printf("Skipping page with pa 0x%jx\n",
- (uintmax_t)pa);
- else
- vm_phys_add_page(pa);
+ vm_phys_add_page(pa);
pa += PAGE_SIZE;
}
}
+
+ TAILQ_INIT(&blacklist_head);
+ vm_page_blacklist_load(&list, &listend);
+ vm_page_blacklist_check(list, listend);
+
+ list = kern_getenv("vm.blacklist");
+ vm_page_blacklist_check(list, NULL);
+
freeenv(list);
#if VM_NRESERVLEVEL > 0
/*
OpenPOWER on IntegriCloud