From 44220d7e1e07bbf1c88215543c5877abcdb88d29 Mon Sep 17 00:00:00 2001 From: jhb Date: Fri, 24 Jul 2009 13:50:29 +0000 Subject: Add a new type of VM object: OBJT_SG. An OBJT_SG object is very similar to a device pager (OBJT_DEVICE) object in that it uses fictitious pages to provide aliases to other memory addresses. The primary difference is that it uses an sglist(9) to determine the physical addresses for a given offset into the object instead of invoking the d_mmap() method in a device driver. Reviewed by: alc Approved by: re (kensmith) MFC after: 2 weeks --- sys/vm/sg_pager.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sys/vm/vm.h | 2 +- sys/vm/vm_fault.c | 3 +- sys/vm/vm_map.c | 14 ++- sys/vm/vm_meter.c | 2 +- sys/vm/vm_object.c | 1 + sys/vm/vm_object.h | 9 ++ sys/vm/vm_page.c | 2 +- sys/vm/vm_pageout.c | 4 +- sys/vm/vm_pager.c | 3 +- sys/vm/vm_pager.h | 1 + 11 files changed, 293 insertions(+), 11 deletions(-) create mode 100644 sys/vm/sg_pager.c (limited to 'sys/vm') diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c new file mode 100644 index 0000000..bf3390f --- /dev/null +++ b/sys/vm/sg_pager.c @@ -0,0 +1,263 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * This pager manages OBJT_SG objects. These objects are backed by + * a scatter/gather list of physical address ranges. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void sg_pager_init(void); +static vm_object_t sg_pager_alloc(void *, vm_ooffset_t, vm_prot_t, + vm_ooffset_t, struct ucred *); +static void sg_pager_dealloc(vm_object_t); +static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int); +static void sg_pager_putpages(vm_object_t, vm_page_t *, int, + boolean_t, int *); +static boolean_t sg_pager_haspage(vm_object_t, vm_pindex_t, int *, + int *); + +static uma_zone_t fakepg_zone; + +static vm_page_t sg_pager_getfake(vm_paddr_t, vm_memattr_t); +static void sg_pager_putfake(vm_page_t); + +struct pagerops sgpagerops = { + .pgo_init = sg_pager_init, + .pgo_alloc = sg_pager_alloc, + .pgo_dealloc = sg_pager_dealloc, + .pgo_getpages = sg_pager_getpages, + .pgo_putpages = sg_pager_putpages, + .pgo_haspage = sg_pager_haspage, +}; + +static void +sg_pager_init(void) +{ + + fakepg_zone = uma_zcreate("SG fakepg", sizeof(struct vm_page), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, + UMA_ZONE_NOFREE|UMA_ZONE_VM); +} + +static vm_object_t +sg_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred) +{ + struct sglist *sg; + vm_object_t object; + vm_pindex_t npages, pindex; + int i; + + /* + * Offset should be page aligned. + */ + if (foff & PAGE_MASK) + return (NULL); + + /* + * The scatter/gather list must only include page-aligned + * ranges. + */ + npages = 0; + sg = handle; + for (i = 0; i < sg->sg_nseg; i++) { + if ((sg->sg_segs[i].ss_paddr % PAGE_SIZE) != 0 || + (sg->sg_segs[i].ss_len % PAGE_SIZE) != 0) + return (NULL); + npages += sg->sg_segs[i].ss_len / PAGE_SIZE; + } + + /* + * The scatter/gather list has a fixed size. Refuse requests + * to map beyond that. + */ + size = round_page(size); + pindex = OFF_TO_IDX(foff + size); + if (pindex > npages) + return (NULL); + + /* + * Allocate a new object and associate it with the + * scatter/gather list. It is ok for our purposes to have + * multiple VM objects associated with the same scatter/gather + * list because scatter/gather lists are static. This is also + * simpler than ensuring a unique object per scatter/gather + * list. + */ + object = vm_object_allocate(OBJT_SG, npages); + object->handle = sglist_hold(sg); + TAILQ_INIT(&object->un_pager.sgp.sgp_pglist); + return (object); +} + +static void +sg_pager_dealloc(vm_object_t object) +{ + struct sglist *sg; + vm_page_t m; + + /* + * Free up our fake pages. + */ + while ((m = TAILQ_FIRST(&object->un_pager.sgp.sgp_pglist)) != 0) { + TAILQ_REMOVE(&object->un_pager.sgp.sgp_pglist, m, pageq); + sg_pager_putfake(m); + } + + sg = object->handle; + sglist_free(sg); +} + +static int +sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +{ + struct sglist *sg; + vm_page_t m_paddr, page; + vm_pindex_t offset; + vm_paddr_t paddr; + vm_memattr_t memattr; + size_t space; + int i; + + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + sg = object->handle; + memattr = object->memattr; + VM_OBJECT_UNLOCK(object); + offset = m[reqpage]->pindex; + + /* + * Lookup the physical address of the requested page. An initial + * value of '1' instead of '0' is used so we can assert that the + * page is found since '0' can be a valid page-aligned physical + * address. + */ + space = 0; + paddr = 1; + for (i = 0; i < sg->sg_nseg; i++) { + if (space + sg->sg_segs[i].ss_len <= (offset * PAGE_SIZE)) { + space += sg->sg_segs[i].ss_len; + continue; + } + paddr = sg->sg_segs[i].ss_paddr + offset * PAGE_SIZE - space; + break; + } + KASSERT(paddr != 1, ("invalid SG page index")); + + /* If "paddr" is a real page, perform a sanity check on "memattr". */ + if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && + pmap_page_get_memattr(m_paddr) != memattr) { + memattr = pmap_page_get_memattr(m_paddr); + printf( + "WARNING: A device driver has set \"memattr\" inconsistently.\n"); + } + + /* Return a fake page for the requested page. */ + KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS), + ("backing page for SG is fake")); + + /* Construct a new fake page. */ + printf("SG: getting fake page for paddr %lx\n", paddr); + page = sg_pager_getfake(paddr, memattr); + VM_OBJECT_LOCK(object); + TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); + + /* Free the original pages and insert this fake page into the object. */ + vm_page_lock_queues(); + for (i = 0; i < count; i++) { + printf("SG: freeing VM page %p\n", m[i]); + vm_page_free(m[i]); + } + vm_page_unlock_queues(); + printf("SG: Inserting new fake page\n"); + vm_page_insert(page, object, offset); + m[reqpage] = page; + + return (VM_PAGER_OK); +} + +static void +sg_pager_putpages(vm_object_t object, vm_page_t *m, int count, + boolean_t sync, int *rtvals) +{ + + panic("sg_pager_putpage called"); +} + +static boolean_t +sg_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, + int *after) +{ + + if (before != NULL) + *before = 0; + if (after != NULL) + *after = 0; + return (TRUE); +} + +/* + * Create a fictitious page with the specified physical address and memory + * attribute. The memory attribute is the only the machine-dependent aspect + * of a fictitious page that must be initialized. + */ +static vm_page_t +sg_pager_getfake(vm_paddr_t paddr, vm_memattr_t memattr) +{ + vm_page_t m; + + m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); + m->phys_addr = paddr; + /* Fictitious pages don't use "segind". */ + m->flags = PG_FICTITIOUS; + /* Fictitious pages don't use "order" or "pool". */ + m->oflags = VPO_BUSY; + m->wire_count = 1; + pmap_page_set_memattr(m, memattr); + return (m); +} + +static void +sg_pager_putfake(vm_page_t m) +{ + + if (!(m->flags & PG_FICTITIOUS)) + panic("sg_pager_putfake: bad page"); + uma_zfree(fakepg_zone, m); +} diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 9159e93..b547514 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -89,7 +89,7 @@ typedef u_char vm_prot_t; /* protection codes */ #define VM_PROT_DEFAULT VM_PROT_ALL enum obj_type { OBJT_DEFAULT, OBJT_SWAP, OBJT_VNODE, OBJT_DEVICE, OBJT_PHYS, - OBJT_DEAD }; + OBJT_DEAD, OBJT_SG }; typedef u_char objtype_t; union vm_map_object; diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index ff8852b..98b6b09 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -472,7 +472,8 @@ readrest: (fs.first_object == fs.object || (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object))) && fs.first_object->type != OBJT_DEVICE && - fs.first_object->type != OBJT_PHYS) { + fs.first_object->type != OBJT_PHYS && + fs.first_object->type != OBJT_SG) { vm_pindex_t firstpindex, tmppindex; if (fs.first_pindex < 2 * VM_FAULT_READ) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index dc7a490..b5651ef 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1719,7 +1719,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) return; VM_OBJECT_LOCK(object); - if (object->type == OBJT_DEVICE) { + if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { pmap_object_init_pt(map->pmap, addr, object, pindex, size); goto unlock_return; } @@ -2247,7 +2247,8 @@ done: */ vm_fault_unwire(map, entry->start, entry->end, entry->object.vm_object != NULL && - entry->object.vm_object->type == OBJT_DEVICE); + (entry->object.vm_object->type == OBJT_DEVICE || + entry->object.vm_object->type == OBJT_SG)); } } KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, @@ -2366,7 +2367,8 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, saved_start = entry->start; saved_end = entry->end; fictitious = entry->object.vm_object != NULL && - entry->object.vm_object->type == OBJT_DEVICE; + (entry->object.vm_object->type == OBJT_DEVICE || + entry->object.vm_object->type == OBJT_SG); /* * Release the map lock, relying on the in-transition * mark. @@ -2462,7 +2464,8 @@ done: */ vm_fault_unwire(map, entry->start, entry->end, entry->object.vm_object != NULL && - entry->object.vm_object->type == OBJT_DEVICE); + (entry->object.vm_object->type == OBJT_DEVICE || + entry->object.vm_object->type == OBJT_SG)); } } next_entry_done: @@ -2595,7 +2598,8 @@ vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { vm_fault_unwire(map, entry->start, entry->end, entry->object.vm_object != NULL && - entry->object.vm_object->type == OBJT_DEVICE); + (entry->object.vm_object->type == OBJT_DEVICE || + entry->object.vm_object->type == OBJT_SG)); entry->wired_count = 0; } diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 07d1bd6..269de51 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -205,7 +205,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) * synchronization should not impair the accuracy of * the reported statistics. */ - if (object->type == OBJT_DEVICE) { + if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { /* * Devices, like /dev/mem, will badly skew our totals. */ diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 10d58e7..d7a6e97 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -309,6 +309,7 @@ vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_PHYS: + case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 0e57aca..0b06fed 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -124,6 +124,15 @@ struct vm_object { } devp; /* + * SG pager + * + * sgp_pglist - list of allocated pages + */ + struct { + TAILQ_HEAD(, vm_page) sgp_pglist; + } sgp; + + /* * Swap pager * * swp_bcount - number of swap 'swblock' metablocks, each diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index d8d74c0..ac363b7 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1163,7 +1163,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) if (object != NULL) { /* Ignore device objects; the pager sets "memattr" for them. */ if (object->memattr != VM_MEMATTR_DEFAULT && - object->type != OBJT_DEVICE) + object->type != OBJT_DEVICE && object->type != OBJT_SG) pmap_page_set_memattr(m, object->memattr); vm_page_insert(m, object, pindex); } else diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index b64215b..19edce1 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -513,7 +513,9 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) int actcount, rcount, remove_mode; VM_OBJECT_LOCK_ASSERT(first_object, MA_OWNED); - if (first_object->type == OBJT_DEVICE || first_object->type == OBJT_PHYS) + if (first_object->type == OBJT_DEVICE || + first_object->type == OBJT_SG || + first_object->type == OBJT_PHYS) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 3758d2b..473d518 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -160,7 +160,8 @@ struct pagerops *pagertab[] = { &vnodepagerops, /* OBJT_VNODE */ &devicepagerops, /* OBJT_DEVICE */ &physpagerops, /* OBJT_PHYS */ - &deadpagerops /* OBJT_DEAD */ + &deadpagerops, /* OBJT_DEAD */ + &sgpagerops /* OBJT_SG */ }; static const int npagers = sizeof(pagertab) / sizeof(pagertab[0]); diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 7c8aeba..75d8f0a 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -70,6 +70,7 @@ extern struct pagerops swappagerops; extern struct pagerops vnodepagerops; extern struct pagerops devicepagerops; extern struct pagerops physpagerops; +extern struct pagerops sgpagerops; /* * get/put return values -- cgit v1.1