summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2017-07-05 06:18:00 +0000
committerkib <kib@FreeBSD.org>2017-07-05 06:18:00 +0000
commitc3699e91289a5a02b0c16eec22ee4d6ad7d9602e (patch)
tree8464efcaf14fe801b7ebaa3f4f94ec5b7aa6d532 /sys/vm
parent341bd3d910d113eef840ff3f778c79345f879315 (diff)
downloadFreeBSD-src-c3699e91289a5a02b0c16eec22ee4d6ad7d9602e.zip
FreeBSD-src-c3699e91289a5a02b0c16eec22ee4d6ad7d9602e.tar.gz
Add MAP_GUARD and use it for stack grow area protection.
Bump __FreeBSD_version. MFC r320317: Implement address space guards. MFC r320338: Remove stale part of the comment. MFC r320339: Correctly handle small MAP_STACK requests. MFC r320344: For now, allow mprotect(2) over the guards to succeed regardless of the requested protection. MFC r320430: Treat the addr argument for mmap(2) request without MAP_FIXED flag as a hint. MFC r320560 (by alc): Modify vm_map_growstack() to protect itself from the possibility of the gap entry in the vm map being smaller than the sysctl-derived stack guard size.
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm.h1
-rw-r--r--sys/vm/vm_fault.c18
-rw-r--r--sys/vm/vm_map.c446
-rw-r--r--sys/vm/vm_map.h10
-rw-r--r--sys/vm/vm_mmap.c34
5 files changed, 263 insertions, 246 deletions
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 68bb4d1..0e523ea 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -78,6 +78,7 @@ typedef u_char vm_prot_t; /* protection codes */
#define VM_PROT_WRITE ((vm_prot_t) 0x02)
#define VM_PROT_EXECUTE ((vm_prot_t) 0x04)
#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */
+#define VM_PROT_FAULT_LOOKUP ((vm_prot_t) 0x010)
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE)
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 2a90c15..d2147f6 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int locked, nera, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
- bool dead, growstack, hardfault, is_first_object_locked;
+ bool dead, hardfault, is_first_object_locked;
PCPU_INC(cnt.v_vm_faults);
fs.vp = NULL;
faultcount = 0;
nera = -1;
- growstack = true;
hardfault = false;
RetryFault:;
@@ -511,17 +510,10 @@ RetryFault:;
* search.
*/
fs.map = map;
- result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
- &fs.first_object, &fs.first_pindex, &prot, &wired);
+ result = vm_map_lookup(&fs.map, vaddr, fault_type |
+ VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
+ &fs.first_pindex, &prot, &wired);
if (result != KERN_SUCCESS) {
- if (growstack && result == KERN_INVALID_ADDRESS &&
- map != kernel_map) {
- result = vm_map_growstack(curproc, vaddr);
- if (result != KERN_SUCCESS)
- return (KERN_FAILURE);
- growstack = false;
- goto RetryFault;
- }
unlock_vp(&fs);
return (result);
}
@@ -547,6 +539,8 @@ RetryFault:;
goto RetryFault;
}
+ MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
+
if (wired)
fault_type = prot | (fault_type & VM_PROT_COPY);
else
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 12fa6a6..70a632e 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -133,6 +133,8 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
+static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
+ vm_map_entry_t gap_entry);
static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
#ifdef INVARIANTS
@@ -1214,6 +1216,10 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
if (prev_entry->next != &map->header && prev_entry->next->start < end)
return (KERN_NO_SPACE);
+ if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
+ max != VM_PROT_NONE))
+ return (KERN_INVALID_ARGUMENT);
+
protoeflags = 0;
if (cow & MAP_COPY_ON_WRITE)
protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
@@ -1229,13 +1235,19 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
protoeflags |= MAP_ENTRY_GROWS_UP;
if (cow & MAP_VN_WRITECOUNT)
protoeflags |= MAP_ENTRY_VN_WRITECNT;
+ if ((cow & MAP_CREATE_GUARD) != 0)
+ protoeflags |= MAP_ENTRY_GUARD;
+ if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
+ protoeflags |= MAP_ENTRY_STACK_GAP_DN;
+ if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
+ protoeflags |= MAP_ENTRY_STACK_GAP_UP;
if (cow & MAP_INHERIT_SHARE)
inheritance = VM_INHERIT_SHARE;
else
inheritance = VM_INHERIT_DEFAULT;
cred = NULL;
- if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
+ if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
goto charged;
if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
@@ -1284,7 +1296,8 @@ charged:
if (prev_entry->inheritance == inheritance &&
prev_entry->protection == prot &&
prev_entry->max_protection == max) {
- map->size += end - prev_entry->end;
+ if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
+ map->size += end - prev_entry->end;
prev_entry->end = end;
vm_map_entry_resize_free(map, prev_entry);
vm_map_simplify_entry(map, prev_entry);
@@ -1321,7 +1334,6 @@ charged:
new_entry->eflags = protoeflags;
new_entry->object.vm_object = object;
new_entry->offset = offset;
- new_entry->avail_ssize = 0;
new_entry->inheritance = inheritance;
new_entry->protection = prot;
@@ -1339,7 +1351,8 @@ charged:
* Insert the new entry into the list
*/
vm_map_entry_link(map, prev_entry, new_entry);
- map->size += new_entry->end - new_entry->start;
+ if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
+ map->size += new_entry->end - new_entry->start;
/*
* Try to coalesce the new entry with both the previous and next
@@ -1543,6 +1556,25 @@ again:
return (result);
}
+int
+vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
+ vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+ vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
+ int cow)
+{
+ vm_offset_t hint;
+ int rv;
+
+ hint = *addr;
+ for (;;) {
+ rv = vm_map_find(map, object, offset, addr, length, max_addr,
+ find_space, prot, max, cow);
+ if (rv == KERN_SUCCESS || min_addr >= hint)
+ return (rv);
+ *addr = min_addr;
+ }
+}
+
/*
* vm_map_simplify_entry:
*
@@ -1674,7 +1706,8 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
* map. This is a bit of a hack, but is also about the best place to
* put this improvement.
*/
- if (entry->object.vm_object == NULL && !map->system_map) {
+ if (entry->object.vm_object == NULL && !map->system_map &&
+ (entry->eflags & MAP_ENTRY_GUARD) == 0) {
vm_object_t object;
object = vm_object_allocate(OBJT_DEFAULT,
atop(entry->end - entry->start));
@@ -1753,7 +1786,8 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
* map. This is a bit of a hack, but is also about the best place to
* put this improvement.
*/
- if (entry->object.vm_object == NULL && !map->system_map) {
+ if (entry->object.vm_object == NULL && !map->system_map &&
+ (entry->eflags & MAP_ENTRY_GUARD) == 0) {
vm_object_t object;
object = vm_object_allocate(OBJT_DEFAULT,
atop(entry->end - entry->start));
@@ -1988,6 +2022,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
*/
for (current = entry; current != &map->header && current->start < end;
current = current->next) {
+ if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+ continue;
if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
vm_map_unlock(map);
return (KERN_INVALID_ARGUMENT);
@@ -2010,7 +2046,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
if (set_max ||
((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
- ENTRY_CHARGED(current)) {
+ ENTRY_CHARGED(current) ||
+ (current->eflags & MAP_ENTRY_GUARD) != 0) {
continue;
}
@@ -2059,6 +2096,9 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
*/
for (current = entry; current != &map->header && current->start < end;
current = current->next) {
+ if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+ continue;
+
old_prot = current->protection;
if (set_max)
@@ -2312,7 +2352,9 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
entry = temp_entry->next;
while ((entry != &map->header) && (entry->start < end)) {
vm_map_clip_end(map, entry, end);
- entry->inheritance = new_inheritance;
+ if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
+ new_inheritance != VM_INHERIT_ZERO)
+ entry->inheritance = new_inheritance;
vm_map_simplify_entry(map, entry);
entry = entry->next;
}
@@ -2918,6 +2960,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
vm_map_entry_unlink(map, entry);
object = entry->object.vm_object;
+
+ if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
+ MPASS(entry->cred == NULL);
+ MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
+ MPASS(object == NULL);
+ vm_map_entry_deallocate(entry, map->system_map);
+ return;
+ }
+
size = entry->end - entry->start;
map->size -= size;
@@ -3276,6 +3327,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
vm_size_t entrysize;
vm_offset_t newend;
+ if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
+ return;
entrysize = entry->end - entry->start;
vm2->vm_map.size += entrysize;
if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
@@ -3312,6 +3365,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
vm_map_entry_t new_entry, old_entry;
vm_object_t object;
int locked;
+ vm_inherit_t inh;
old_map = &vm1->vm_map;
/* Copy immutable fields of vm1 to vm2. */
@@ -3334,7 +3388,12 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
panic("vm_map_fork: encountered a submap");
- switch (old_entry->inheritance) {
+ inh = old_entry->inheritance;
+ if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+ inh != VM_INHERIT_NONE)
+ inh = VM_INHERIT_COPY;
+
+ switch (inh) {
case VM_INHERIT_NONE:
break;
@@ -3467,7 +3526,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
new_entry->start = old_entry->start;
new_entry->end = old_entry->end;
- new_entry->avail_ssize = old_entry->avail_ssize;
new_entry->eflags = old_entry->eflags &
~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
MAP_ENTRY_VN_WRITECNT);
@@ -3530,30 +3588,40 @@ out:
return (rv);
}
+static int stack_guard_page = 1;
+SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
+ &stack_guard_page, 0,
+ "Specifies the number of guard pages for a stack that grows");
+
static int
vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
{
vm_map_entry_t new_entry, prev_entry;
- vm_offset_t bot, top;
- vm_size_t init_ssize;
+ vm_offset_t bot, gap_bot, gap_top, top;
+ vm_size_t init_ssize, sgp;
int orient, rv;
/*
* The stack orientation is piggybacked with the cow argument.
* Extract it into orient and mask the cow argument so that we
* don't pass it around further.
- * NOTE: We explicitly allow bi-directional stacks.
*/
- orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
+ orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
KASSERT(orient != 0, ("No stack grow direction"));
+ KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
+ ("bi-dir stack"));
+ sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
if (addrbos < vm_map_min(map) ||
addrbos > vm_map_max(map) ||
- addrbos + max_ssize < addrbos)
+ addrbos + max_ssize < addrbos ||
+ sgp >= max_ssize)
return (KERN_NO_SPACE);
- init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
+ init_ssize = growsize;
+ if (max_ssize < init_ssize + sgp)
+ init_ssize = max_ssize - sgp;
/* If addr is already mapped, no go */
if (vm_map_lookup_entry(map, addrbos, &prev_entry))
@@ -3561,12 +3629,6 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
/*
* If we can't accommodate max_ssize in the current mapping, no go.
- * However, we need to be aware that subsequent user mappings might
- * map into the space we have reserved for stack, and currently this
- * space is not protected.
- *
- * Hopefully we will at least detect this condition when we try to
- * grow the stack.
*/
if ((prev_entry->next != &map->header) &&
(prev_entry->next->start < addrbos + max_ssize))
@@ -3582,57 +3644,53 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* and cow to be 0. Possibly we should eliminate these as input
* parameters, and just pass these values here in the insert call.
*/
- if (orient == MAP_STACK_GROWS_DOWN)
+ if (orient == MAP_STACK_GROWS_DOWN) {
bot = addrbos + max_ssize - init_ssize;
- else if (orient == MAP_STACK_GROWS_UP)
+ top = bot + init_ssize;
+ gap_bot = addrbos;
+ gap_top = bot;
+ } else /* if (orient == MAP_STACK_GROWS_UP) */ {
bot = addrbos;
- else
- bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
- top = bot + init_ssize;
- rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
-
- /* Now set the avail_ssize amount. */
- if (rv == KERN_SUCCESS) {
- new_entry = prev_entry->next;
- if (new_entry->end != top || new_entry->start != bot)
- panic("Bad entry start/end for new stack entry");
-
- new_entry->avail_ssize = max_ssize - init_ssize;
- KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
- (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
- ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
- KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
- (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
- ("new entry lacks MAP_ENTRY_GROWS_UP"));
+ top = bot + init_ssize;
+ gap_bot = top;
+ gap_top = addrbos + max_ssize;
}
-
+ rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
+ if (rv != KERN_SUCCESS)
+ return (rv);
+ new_entry = prev_entry->next;
+ KASSERT(new_entry->end == top || new_entry->start == bot,
+ ("Bad entry start/end for new stack entry"));
+ KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
+ (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
+ ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
+ KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
+ (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
+ ("new entry lacks MAP_ENTRY_GROWS_UP"));
+ rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
+ VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
+ MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
+ if (rv != KERN_SUCCESS)
+ (void)vm_map_delete(map, bot, top);
return (rv);
}
-static int stack_guard_page = 0;
-SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
- &stack_guard_page, 0,
- "Insert stack guard page ahead of the growable segments.");
-
-/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the
- * desired address is already mapped, or if we successfully grow
- * the stack. Also returns KERN_SUCCESS if addr is outside the
- * stack range (this is strange, but preserves compatibility with
- * the grow function in vm_machdep.c).
+/*
+ * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we
+ * successfully grow the stack.
*/
-int
-vm_map_growstack(struct proc *p, vm_offset_t addr)
+static int
+vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
{
- vm_map_entry_t next_entry, prev_entry;
- vm_map_entry_t new_entry, stack_entry;
- struct vmspace *vm = p->p_vmspace;
- vm_map_t map = &vm->vm_map;
- vm_offset_t end;
- vm_size_t growsize;
- size_t grow_amount, max_grow;
- rlim_t lmemlim, stacklim, vmemlim;
- int is_procstack, rv;
+ vm_map_entry_t stack_entry;
+ struct proc *p;
+ struct vmspace *vm;
struct ucred *cred;
+ vm_offset_t gap_end, gap_start, grow_start;
+ size_t grow_amount, guard, max_grow;
+ rlim_t lmemlim, stacklim, vmemlim;
+ int rv, rv1;
+ bool gap_deleted, grow_down, is_procstack;
#ifdef notyet
uint64_t limit;
#endif
@@ -3640,125 +3698,74 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
int error;
#endif
+ p = curproc;
+ vm = p->p_vmspace;
+ MPASS(map == &p->p_vmspace->vm_map);
+ MPASS(!map->system_map);
+
+ guard = stack_guard_page * PAGE_SIZE;
lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
stacklim = lim_cur(curthread, RLIMIT_STACK);
vmemlim = lim_cur(curthread, RLIMIT_VMEM);
-Retry:
-
- vm_map_lock_read(map);
-
- /* If addr is already in the entry range, no need to grow.*/
- if (vm_map_lookup_entry(map, addr, &prev_entry)) {
- vm_map_unlock_read(map);
+retry:
+ /* If addr is not in a hole for a stack grow area, no need to grow. */
+ if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
+ return (KERN_FAILURE);
+ if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
return (KERN_SUCCESS);
- }
-
- next_entry = prev_entry->next;
- if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
- /*
- * This entry does not grow upwards. Since the address lies
- * beyond this entry, the next entry (if one exists) has to
- * be a downward growable entry. The entry list header is
- * never a growable entry, so it suffices to check the flags.
- */
- if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
- vm_map_unlock_read(map);
- return (KERN_SUCCESS);
- }
- stack_entry = next_entry;
- } else {
- /*
- * This entry grows upward. If the next entry does not at
- * least grow downwards, this is the entry we need to grow.
- * otherwise we have two possible choices and we have to
- * select one.
- */
- if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
- /*
- * We have two choices; grow the entry closest to
- * the address to minimize the amount of growth.
- */
- if (addr - prev_entry->end <= next_entry->start - addr)
- stack_entry = prev_entry;
- else
- stack_entry = next_entry;
- } else
- stack_entry = prev_entry;
- }
-
- if (stack_entry == next_entry) {
- KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
- KASSERT(addr < stack_entry->start, ("foo"));
- end = (prev_entry != &map->header) ? prev_entry->end :
- stack_entry->start - stack_entry->avail_ssize;
- grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
- max_grow = stack_entry->start - end;
+ if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
+ stack_entry = gap_entry->next;
+ if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
+ stack_entry->start != gap_entry->end)
+ return (KERN_FAILURE);
+ grow_amount = round_page(stack_entry->start - addr);
+ grow_down = true;
+ } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
+ stack_entry = gap_entry->prev;
+ if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
+ stack_entry->end != gap_entry->start)
+ return (KERN_FAILURE);
+ grow_amount = round_page(addr + 1 - stack_entry->end);
+ grow_down = false;
} else {
- KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
- KASSERT(addr >= stack_entry->end, ("foo"));
- end = (next_entry != &map->header) ? next_entry->start :
- stack_entry->end + stack_entry->avail_ssize;
- grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
- max_grow = end - stack_entry->end;
+ return (KERN_FAILURE);
}
-
- if (grow_amount > stack_entry->avail_ssize) {
- vm_map_unlock_read(map);
+ max_grow = gap_entry->end - gap_entry->start;
+ if (guard > max_grow)
return (KERN_NO_SPACE);
- }
-
- /*
- * If there is no longer enough space between the entries nogo, and
- * adjust the available space. Note: this should only happen if the
- * user has mapped into the stack area after the stack was created,
- * and is probably an error.
- *
- * This also effectively destroys any guard page the user might have
- * intended by limiting the stack size.
- */
- if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
- if (vm_map_lock_upgrade(map))
- goto Retry;
-
- stack_entry->avail_ssize = max_grow;
-
- vm_map_unlock(map);
+ max_grow -= guard;
+ if (grow_amount > max_grow)
return (KERN_NO_SPACE);
- }
-
- is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr &&
- addr < (vm_offset_t)p->p_sysent->sv_usrstack) ? 1 : 0;
/*
* If this is the main process stack, see if we're over the stack
* limit.
*/
- if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
- vm_map_unlock_read(map);
+ is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
+ addr < (vm_offset_t)p->p_sysent->sv_usrstack;
+ if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
return (KERN_NO_SPACE);
- }
+
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(p);
if (is_procstack && racct_set(p, RACCT_STACK,
ctob(vm->vm_ssize) + grow_amount)) {
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
return (KERN_NO_SPACE);
}
PROC_UNLOCK(p);
}
#endif
- /* Round up the grow amount modulo sgrowsiz */
- growsize = sgrowsiz;
- grow_amount = roundup(grow_amount, growsize);
- if (grow_amount > stack_entry->avail_ssize)
- grow_amount = stack_entry->avail_ssize;
+ grow_amount = roundup(grow_amount, sgrowsiz);
+ if (grow_amount > max_grow)
+ grow_amount = max_grow;
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
grow_amount = trunc_page((vm_size_t)stacklim) -
ctob(vm->vm_ssize);
}
+
#ifdef notyet
PROC_LOCK(p);
limit = racct_get_available(p, RACCT_STACK);
@@ -3766,9 +3773,9 @@ Retry:
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
grow_amount = limit - ctob(vm->vm_ssize);
#endif
- if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+
+ if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
- vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@@ -3778,7 +3785,6 @@ Retry:
if (racct_set(p, RACCT_MEMLOCK,
ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@@ -3786,9 +3792,9 @@ Retry:
}
#endif
}
+
/* If we would blow our VMEM resource limit, no go */
if (map->size + grow_amount > vmemlim) {
- vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@@ -3797,7 +3803,6 @@ Retry:
PROC_LOCK(p);
if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@@ -3805,62 +3810,42 @@ Retry:
}
#endif
- if (vm_map_lock_upgrade(map))
- goto Retry;
-
- if (stack_entry == next_entry) {
- /*
- * Growing downward.
- */
- /* Get the preliminary new entry start value */
- addr = stack_entry->start - grow_amount;
+ if (vm_map_lock_upgrade(map)) {
+ gap_entry = NULL;
+ vm_map_lock_read(map);
+ goto retry;
+ }
- /*
- * If this puts us into the previous entry, cut back our
- * growth to the available space. Also, see the note above.
- */
- if (addr < end) {
- stack_entry->avail_ssize = max_grow;
- addr = end;
- if (stack_guard_page)
- addr += PAGE_SIZE;
+ if (grow_down) {
+ grow_start = gap_entry->end - grow_amount;
+ if (gap_entry->start + grow_amount == gap_entry->end) {
+ gap_start = gap_entry->start;
+ gap_end = gap_entry->end;
+ vm_map_entry_delete(map, gap_entry);
+ gap_deleted = true;
+ } else {
+ MPASS(gap_entry->start < gap_entry->end - grow_amount);
+ gap_entry->end -= grow_amount;
+ vm_map_entry_resize_free(map, gap_entry);
+ gap_deleted = false;
}
-
- rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
- next_entry->protection, next_entry->max_protection,
+ rv = vm_map_insert(map, NULL, 0, grow_start,
+ grow_start + grow_amount,
+ stack_entry->protection, stack_entry->max_protection,
MAP_STACK_GROWS_DOWN);
-
- /* Adjust the available stack space by the amount we grew. */
- if (rv == KERN_SUCCESS) {
- new_entry = prev_entry->next;
- KASSERT(new_entry == stack_entry->prev, ("foo"));
- KASSERT(new_entry->end == stack_entry->start, ("foo"));
- KASSERT(new_entry->start == addr, ("foo"));
- KASSERT((new_entry->eflags & MAP_ENTRY_GROWS_DOWN) !=
- 0, ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
- grow_amount = new_entry->end - new_entry->start;
- new_entry->avail_ssize = stack_entry->avail_ssize -
- grow_amount;
- stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
+ if (rv != KERN_SUCCESS) {
+ if (gap_deleted) {
+ rv1 = vm_map_insert(map, NULL, 0, gap_start,
+ gap_end, VM_PROT_NONE, VM_PROT_NONE,
+ MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
+ MPASS(rv1 == KERN_SUCCESS);
+ } else {
+ gap_entry->end += grow_amount;
+ vm_map_entry_resize_free(map, gap_entry);
+ }
}
} else {
- /*
- * Growing upward.
- */
- addr = stack_entry->end + grow_amount;
-
- /*
- * If this puts us into the next entry, cut back our growth
- * to the available space. Also, see the note above.
- */
- if (addr > end) {
- stack_entry->avail_ssize = end - stack_entry->end;
- addr = end;
- if (stack_guard_page)
- addr -= PAGE_SIZE;
- }
-
- grow_amount = addr - stack_entry->end;
+ grow_start = stack_entry->end;
cred = stack_entry->cred;
if (cred == NULL && stack_entry->object.vm_object != NULL)
cred = stack_entry->object.vm_object->cred;
@@ -3872,30 +3857,30 @@ Retry:
stack_entry->offset,
(vm_size_t)(stack_entry->end - stack_entry->start),
(vm_size_t)grow_amount, cred != NULL)) {
- map->size += (addr - stack_entry->end);
- /* Update the current entry. */
- stack_entry->end = addr;
- stack_entry->avail_ssize -= grow_amount;
+ if (gap_entry->start + grow_amount == gap_entry->end)
+ vm_map_entry_delete(map, gap_entry);
+ else
+ gap_entry->start += grow_amount;
+ stack_entry->end += grow_amount;
+ map->size += grow_amount;
vm_map_entry_resize_free(map, stack_entry);
rv = KERN_SUCCESS;
} else
rv = KERN_FAILURE;
}
-
if (rv == KERN_SUCCESS && is_procstack)
vm->vm_ssize += btoc(grow_amount);
- vm_map_unlock(map);
-
/*
* Heed the MAP_WIREFUTURE flag if it was set for this process.
*/
- if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
- vm_map_wire(map,
- (stack_entry == next_entry) ? addr : addr - grow_amount,
- (stack_entry == next_entry) ? stack_entry->start : addr,
+ if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
+ vm_map_unlock(map);
+ vm_map_wire(map, grow_start, grow_start + grow_amount,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
- }
+ vm_map_lock_read(map);
+ } else
+ vm_map_lock_downgrade(map);
out:
#ifdef RACCT
@@ -4019,10 +4004,11 @@ vm_map_lookup(vm_map_t *var_map, /* IN/OUT */
vm_size_t size;
struct ucred *cred;
-RetryLookup:;
+RetryLookup:
vm_map_lock_read(map);
+RetryLookupLocked:
/*
* Lookup the faulting address.
*/
@@ -4048,7 +4034,16 @@ RetryLookup:;
* Check whether this task is allowed to have this page.
*/
prot = entry->protection;
- fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+ if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
+ fault_typea &= ~VM_PROT_FAULT_LOOKUP;
+ if (prot == VM_PROT_NONE && map != kernel_map &&
+ (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+ (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
+ MAP_ENTRY_STACK_GAP_UP)) != 0 &&
+ vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
+ goto RetryLookupLocked;
+ }
+ fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
vm_map_unlock_read(map);
return (KERN_PROTECTION_FAILURE);
@@ -4282,8 +4277,9 @@ vm_map_print(vm_map_t map)
db_indent += 2;
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
- db_iprintf("map entry %p: start=%p, end=%p\n",
- (void *)entry, (void *)entry->start, (void *)entry->end);
+ db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
+ (void *)entry, (void *)entry->start, (void *)entry->end,
+ entry->eflags);
{
static char *inheritance_name[4] =
{"share", "copy", "none", "donate_copy"};
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 10e6564..6d63816 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -103,7 +103,6 @@ struct vm_map_entry {
struct vm_map_entry *right; /* right child in binary search tree */
vm_offset_t start; /* start address */
vm_offset_t end; /* end address */
- vm_offset_t avail_ssize; /* amt can grow if this is a stack */
vm_offset_t next_read; /* vaddr of the next sequential read */
vm_size_t adj_free; /* amount of adjacent free space */
vm_size_t max_free; /* max free space in subtree */
@@ -142,6 +141,9 @@ struct vm_map_entry {
#define MAP_ENTRY_WIRE_SKIPPED 0x4000
#define MAP_ENTRY_VN_WRITECNT 0x8000 /* writeable vnode mapping */
+#define MAP_ENTRY_GUARD 0x10000
+#define MAP_ENTRY_STACK_GAP_DN 0x20000
+#define MAP_ENTRY_STACK_GAP_UP 0x40000
#ifdef _KERNEL
static __inline u_char
@@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_PREFAULT_PARTIAL 0x0010
#define MAP_DISABLE_SYNCER 0x0020
#define MAP_CHECK_EXCL 0x0040
+#define MAP_CREATE_GUARD 0x0080
#define MAP_DISABLE_COREDUMP 0x0100
#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */
#define MAP_VN_WRITECOUNT 0x0400
@@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_STACK_GROWS_UP 0x2000
#define MAP_ACC_CHARGED 0x4000
#define MAP_ACC_NO_CHARGE 0x8000
+#define MAP_CREATE_STACK_GAP_UP 0x10000
+#define MAP_CREATE_STACK_GAP_DN 0x20000
/*
* vm_fault option flags
@@ -367,6 +372,8 @@ vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
vm_offset_t, int, vm_prot_t, vm_prot_t, int);
+int vm_map_find_min(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *,
+ vm_size_t, vm_offset_t, vm_offset_t, int, vm_prot_t, vm_prot_t, int);
int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t,
vm_prot_t, vm_prot_t, int);
int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *);
@@ -387,7 +394,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t);
int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
-int vm_map_growstack (struct proc *p, vm_offset_t addr);
int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
int flags);
int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 68c2108..17631f9 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
}
if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
- MAP_PREFAULT_READ |
+ MAP_PREFAULT_READ | MAP_GUARD |
#ifdef MAP_32BIT
MAP_32BIT |
#endif
@@ -239,6 +239,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
if (prot != PROT_NONE &&
(prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
return (EINVAL);
+ if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
+ pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
+ MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
+ return (EINVAL);
/*
* Align the file position to a page boundary,
@@ -314,7 +318,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
* returns an error earlier.
*/
error = 0;
- } else if (flags & MAP_ANON) {
+ } else if ((flags & MAP_GUARD) != 0) {
+ error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
+ VM_PROT_NONE, flags, NULL, pos, FALSE, td);
+ } else if ((flags & MAP_ANON) != 0) {
/*
* Mapping blank space is trivial.
*
@@ -1431,10 +1438,12 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
boolean_t writecounted, struct thread *td)
{
- boolean_t fitit;
+ boolean_t curmap, fitit;
+ vm_offset_t max_addr;
int docow, error, findspace, rv;
- if (map == &td->td_proc->p_vmspace->vm_map) {
+ curmap = map == &td->td_proc->p_vmspace->vm_map;
+ if (curmap) {
PROC_LOCK(td->td_proc);
if (map->size + size > lim_cur_proc(td->td_proc, RLIMIT_VMEM)) {
PROC_UNLOCK(td->td_proc);
@@ -1511,6 +1520,8 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
if ((flags & MAP_EXCL) != 0)
docow |= MAP_CHECK_EXCL;
+ if ((flags & MAP_GUARD) != 0)
+ docow |= MAP_CREATE_GUARD;
if (fitit) {
if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
@@ -1520,11 +1531,20 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
MAP_ALIGNMENT_SHIFT);
else
findspace = VMFS_OPTIMAL_SPACE;
- rv = vm_map_find(map, object, foff, addr, size,
+ max_addr = 0;
#ifdef MAP_32BIT
- flags & MAP_32BIT ? MAP_32BIT_MAX_ADDR :
+ if ((flags & MAP_32BIT) != 0)
+ max_addr = MAP_32BIT_MAX_ADDR;
#endif
- 0, findspace, prot, maxprot, docow);
+ if (curmap) {
+ rv = vm_map_find_min(map, object, foff, addr, size,
+ round_page((vm_offset_t)td->td_proc->p_vmspace->
+ vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
+ findspace, prot, maxprot, docow);
+ } else {
+ rv = vm_map_find(map, object, foff, addr, size,
+ max_addr, findspace, prot, maxprot, docow);
+ }
} else {
rv = vm_map_fixed(map, object, foff, *addr, size,
prot, maxprot, docow);
OpenPOWER on IntegriCloud