diff options
author | dillon <dillon@FreeBSD.org> | 1999-12-12 03:19:33 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 1999-12-12 03:19:33 +0000 |
commit | b66fb2c64801a0ee59e638561bfd8d3fe36b647c (patch) | |
tree | d1cf00b34925743e2181910ae5e72af2d03be373 /sys/vm | |
parent | aeee88b81a6982928d45c0d80c325cd8372bbab0 (diff) | |
download | FreeBSD-src-b66fb2c64801a0ee59e638561bfd8d3fe36b647c.zip FreeBSD-src-b66fb2c64801a0ee59e638561bfd8d3fe36b647c.tar.gz |
Add MAP_NOSYNC feature to mmap(), and MADV_NOSYNC and MADV_AUTOSYNC to
madvise().
This feature prevents the update daemon from gratuitously flushing
dirty pages associated with a mapped file-backed region of memory. The
system pager will still page the memory as necessary and the VM system
will still be fully coherent with the filesystem. Modifications made
by other means to the same area of memory, for example by write(), are
unaffected. The feature works on a page-granularity basis.
MAP_NOSYNC allows one to use mmap() to share memory between processes
without incuring any significant filesystem overhead, putting it in
the same performance category as SysV Shared memory and anonymous memory.
Reviewed by: julian, alc, dg
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/vm_fault.c | 18 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 17 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 3 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 7 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 35 | ||||
-rw-r--r-- | sys/vm/vm_object.h | 5 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 10 | ||||
-rw-r--r-- | sys/vm/vm_page.h | 8 |
8 files changed, 82 insertions, 21 deletions
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index ca1ee1c..23e2ba0 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -779,15 +779,29 @@ readrest: vm_page_flag_set(fs.m, PG_WRITEABLE); vm_object_set_flag(fs.m->object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); + /* * If the fault is a write, we know that this page is being - * written NOW. This will save on the pmap_is_modified() calls - * later. + * written NOW so dirty it explicitly to save on + * pmap_is_modified() calls later. + * + * If this is a NOSYNC mmap we do not want to set PG_NOSYNC + * if the page is already dirty to prevent data written with + * the expectation of being synced from not being synced. + * Likewise if this entry does not request NOSYNC then make + * sure the page isn't marked NOSYNC. Applications sharing + * data should use the same flags to avoid ping ponging. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (fault_flags & VM_FAULT_DIRTY) { + if (fs.entry->eflags & MAP_ENTRY_NOSYNC) { + if (fs.m->dirty == 0) + vm_page_flag_set(fs.m, PG_NOSYNC); + } else { + vm_page_flag_clear(fs.m, PG_NOSYNC); + } vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index eaa65f7..a1f422d3 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -460,6 +460,9 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, KASSERT(object == NULL, ("vm_map_insert: paradoxical MAP_NOFAULT request")); } + if (cow & MAP_DISABLE_SYNCER) + protoeflags |= MAP_ENTRY_NOSYNC; + if (object) { /* * When object is non-NULL, it could be shared with another @@ -539,13 +542,15 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, * Update the free space hint */ if ((map->first_free == prev_entry) && - (prev_entry->end >= new_entry->start)) + (prev_entry->end >= new_entry->start)) { map->first_free = new_entry; + } - if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) + if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { pmap_object_init_pt(map->pmap, start, object, OFF_TO_IDX(offset), end - start, cow & MAP_PREFAULT_PARTIAL); + } return (KERN_SUCCESS); } @@ -1026,6 +1031,8 @@ vm_map_madvise(map, start, end, behav) case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: + case MADV_NOSYNC: + case MADV_AUTOSYNC: modify_map = 1; vm_map_lock(map); break; @@ -1077,6 +1084,12 @@ vm_map_madvise(map, start, end, behav) case MADV_RANDOM: vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); break; + case MADV_NOSYNC: + current->eflags |= MAP_ENTRY_NOSYNC; + break; + case MADV_AUTOSYNC: + current->eflags &= ~MAP_ENTRY_NOSYNC; + break; default: break; } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index b02f970..b02317b 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -112,7 +112,7 @@ struct vm_map_entry { vm_pindex_t lastr; /* last read */ }; -#define MAP_ENTRY_UNUSED_01 0x1 +#define MAP_ENTRY_NOSYNC 0x1 #define MAP_ENTRY_IS_SUB_MAP 0x2 #define MAP_ENTRY_COW 0x4 #define MAP_ENTRY_NEEDS_COPY 0x8 @@ -329,6 +329,7 @@ vmspace_resident_count(struct vmspace *vmspace) #define MAP_NOFAULT 0x4 #define MAP_PREFAULT 0x8 #define MAP_PREFAULT_PARTIAL 0x10 +#define MAP_DISABLE_SYNCER 0x20 /* * vm_fault option flags diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index ce349acb..1721fd3 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -626,7 +626,7 @@ madvise(p, uap) /* * Check for illegal behavior */ - if (uap->behav < 0 || uap->behav > MADV_FREE) + if (uap->behav < 0 || uap->behav > MADV_AUTOSYNC) return (EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note @@ -1046,9 +1046,10 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, flags |= MAP_SHARED; } - if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { + if ((flags & (MAP_ANON|MAP_SHARED)) == 0) docow |= MAP_COPY_ON_WRITE; - } + if (flags & MAP_NOSYNC) + docow |= MAP_DISABLE_SYNCER; #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 1df9faf..6da6c9b 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -478,8 +478,10 @@ vm_object_terminate(object) /* * vm_object_page_clean * - * Clean all dirty pages in the specified range of object. - * Leaves page on whatever queue it is currently on. + * Clean all dirty pages in the specified range of object. Leaves page + * on whatever queue it is currently on. If NOSYNC is set then do not + * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC), + * leaving the object dirty. * * Odd semantics: if start == end, we clean everything. * @@ -503,6 +505,7 @@ vm_object_page_clean(object, start, end, flags) int chkb; int maxb; int i; + int clearobjflags; int pagerflags; vm_page_t maf[vm_pageout_page_count]; vm_page_t mab[vm_pageout_page_count]; @@ -527,12 +530,26 @@ vm_object_page_clean(object, start, end, flags) tend = end; } + /* + * Generally set CLEANCHK interlock and make the page read-only so + * we can then clear the object flags. + * + * However, if this is a nosync mmap then the object is likely to + * stay dirty so do not mess with the page and do not clear the + * object flags. + */ + + clearobjflags = 1; + for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) { vm_page_flag_set(p, PG_CLEANCHK); - vm_page_protect(p, VM_PROT_READ); + if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) + clearobjflags = 0; + else + vm_page_protect(p, VM_PROT_READ); } - if ((tstart == 0) && (tend == object->size)) { + if (clearobjflags && (tstart == 0) && (tend == object->size)) { vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); } @@ -557,6 +574,16 @@ rescan: continue; } + /* + * If we have been asked to skip nosync pages and this is a + * nosync page, skip it. Note that the object flags were + * not cleared in this case so we do not have to set them. + */ + if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { + vm_page_flag_clear(p, PG_CLEANCHK); + continue; + } + s = splvm(); while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { if (object->generation != curgeneration) { diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 315cedb..165e0e1 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -153,8 +153,9 @@ struct vm_object { #ifdef KERNEL -#define OBJPC_SYNC 0x1 /* sync I/O */ -#define OBJPC_INVAL 0x2 /* invalidate */ +#define OBJPC_SYNC 0x1 /* sync I/O */ +#define OBJPC_INVAL 0x2 /* invalidate */ +#define OBJPC_NOSYNC 0x4 /* skip if PG_NOSYNC */ TAILQ_HEAD(object_q, vm_object); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 078743f..edde291 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1522,15 +1522,19 @@ vm_page_set_validclean(m, base, size) /* * Set valid, clear dirty bits. If validating the entire - * page we can safely clear the pmap modify bit. + * page we can safely clear the pmap modify bit. We also + * use this opportunity to clear the PG_NOSYNC flag. If a process + * takes a write fault on a MAP_NOSYNC memory area the flag will + * be set again. */ pagebits = vm_page_bits(base, size); m->valid |= pagebits; m->dirty &= ~pagebits; - - if (base == 0 && size == PAGE_SIZE) + if (base == 0 && size == PAGE_SIZE) { pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + vm_page_flag_clear(m, PG_NOSYNC); + } } #if 0 diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 0cfc618..c5d2827 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -234,6 +234,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT]; #define PG_REFERENCED 0x0080 /* page has been referenced */ #define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */ #define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */ +#define PG_NOSYNC 0x0400 /* do not collect for syncer */ /* * Misc constants. @@ -437,10 +438,9 @@ vm_page_unhold(vm_page_t mem) /* * vm_page_protect: * - * Reduce the protection of a page. This routine never - * raises the protection and therefore can be safely - * called if the page is already at VM_PROT_NONE ( it - * will be a NOP effectively ). + * Reduce the protection of a page. This routine never raises the + * protection and therefore can be safely called if the page is already + * at VM_PROT_NONE (it will be a NOP effectively ). */ static __inline void |