summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libc/sys/mmap.253
-rw-r--r--sys/sys/mman.h11
-rw-r--r--sys/vm/vm_init.c2
-rw-r--r--sys/vm/vm_kern.c2
-rw-r--r--sys/vm/vm_map.c21
-rw-r--r--sys/vm/vm_map.h8
-rw-r--r--sys/vm/vm_mmap.c28
-rw-r--r--usr.bin/kdump/mksubr35
-rw-r--r--usr.bin/truss/syscalls.c38
9 files changed, 174 insertions, 24 deletions
diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2
index 73ffb2e..130f70b 100644
--- a/lib/libc/sys/mmap.2
+++ b/lib/libc/sys/mmap.2
@@ -28,7 +28,7 @@
.\" @(#)mmap.2 8.4 (Berkeley) 5/11/95
.\" $FreeBSD$
.\"
-.Dd March 18, 2012
+.Dd August 16, 2013
.Dt MMAP 2
.Os
.Sh NAME
@@ -97,7 +97,30 @@ Sharing, mapping type and options are specified in the
argument by
.Em or Ns 'ing
the following values:
-.Bl -tag -width MAP_HASSEMAPHORE
+.Bl -tag -width MAP_PREFAULT_READ
+.It Dv MAP_ALIGNED Ns Pq Fa n
+Align the region on a requested boundary.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The
+.Fa n
+argument specifies the binary logarithm of the desired alignment.
+.It Dv MAP_ALIGNED_SUPER
+Align the region to maximize the potential use of large
+.Pq Dq super
+pages.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The system will choose a suitable page size based on the size of
+mapping.
+The page size used as well as the alignment of the region may both be
+affected by properties of the file being mapped.
+In particular,
+the physical address of existing pages of a file may require a specific
+alignment.
+The region is not guaranteed to be aligned on any specific boundary.
.It Dv MAP_ANON
Map anonymous memory not associated with any specific file.
The file descriptor used for creating
@@ -274,6 +297,25 @@ Although this implementation does not impose any alignment restrictions on
the
.Fa offset
argument, a portable program must only use page-aligned values.
+.Pp
+Large page mappings require that the pages backing an object be
+aligned in matching blocks in both the virtual address space and RAM.
+The system will automatically attempt to use large page mappings when
+mapping an object that is already backed by large pages in RAM by
+aligning the mapping request in the virtual address space to match the
+alignment of the large physical pages.
+The system may also use large page mappings when mapping portions of an
+object that are not yet backed by pages in RAM.
+The
+.Dv MAP_ALIGNED_SUPER
+flag is an optimization that will align the mapping request to the
+size of a large page similar to
+.Dv MAP_ALIGNED ,
+except that the system will override this alignment if an object already
+uses large pages so that the mapping will be consistent with the existing
+large pages.
+This flag is mostly useful for maximizing the use of large pages on the
+first mapping of objects that do not yet have pages present in RAM.
.Sh RETURN VALUES
Upon successful completion,
.Fn mmap
@@ -325,6 +367,10 @@ The
argument
was equal to zero.
.It Bq Er EINVAL
+.Dv MAP_ALIGNED
+was specified and the desired alignment was either larger than the
+virtual address size of the machine or smaller than a page.
+.It Bq Er EINVAL
.Dv MAP_ANON
was specified and the
.Fa fd
@@ -356,7 +402,8 @@ was specified and insufficient memory was available.
.Xr msync 2 ,
.Xr munlock 2 ,
.Xr munmap 2 ,
-.Xr getpagesize 3
+.Xr getpagesize 3 ,
+.Xr getpagesizes 3
.Sh BUGS
The
.Fa len
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
index a178d7c..c5e47a0 100644
--- a/sys/sys/mman.h
+++ b/sys/sys/mman.h
@@ -91,6 +91,17 @@
*/
#define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */
#define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
+
+/*
+ * Request specific alignment (n == log2 of the desired alignment).
+ *
+ * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
+ * not enforce a specific alignment.
+ */
+#define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT)
+#define MAP_ALIGNMENT_SHIFT 24
+#define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff)
+#define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */
#endif /* __BSD_VISIBLE */
#if __POSIX_VISIBLE >= 199309
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index b539f9d..7ab1ee0 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -112,7 +112,7 @@ kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp)
addr = vm_map_min(kernel_map);
result = vm_map_find(kernel_map, NULL, 0, &addr, size,
- VMFS_ALIGNED_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
if (result != KERN_SUCCESS)
return (ENOMEM);
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index c7cb409..9790653 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -286,7 +286,7 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max,
*min = vm_map_min(parent);
ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ?
- VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
+ VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
MAP_ACC_NO_CHARGE);
if (ret != KERN_SUCCESS)
panic("kmem_suballoc: bad status return of %d", ret);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 1d92965..1a6146e 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1434,12 +1434,17 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_size_t length, int find_space, vm_prot_t prot,
vm_prot_t max, int cow)
{
- vm_offset_t start, initial_addr;
+ vm_offset_t alignment, initial_addr, start;
int result;
if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
(object->flags & OBJ_COLORED) == 0))
- find_space = VMFS_ANY_SPACE;
+ find_space = VMFS_ANY_SPACE;
+ if (find_space >> 8 != 0) {
+ KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
+ alignment = (vm_offset_t)1 << (find_space >> 8);
+ } else
+ alignment = 0;
initial_addr = *addr;
again:
start = initial_addr;
@@ -1455,12 +1460,18 @@ again:
return (KERN_NO_SPACE);
}
switch (find_space) {
- case VMFS_ALIGNED_SPACE:
+ case VMFS_SUPER_SPACE:
case VMFS_OPTIMAL_SPACE:
pmap_align_superpage(object, offset, addr,
length);
break;
+ case VMFS_ANY_SPACE:
+ break;
default:
+ if ((*addr & (alignment - 1)) != 0) {
+ *addr &= ~(alignment - 1);
+ *addr += alignment;
+ }
break;
}
@@ -1468,8 +1479,8 @@ again:
}
result = vm_map_insert(map, object, offset, start, start +
length, prot, max, cow);
- } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE ||
- find_space == VMFS_OPTIMAL_SPACE));
+ } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
+ find_space != VMFS_ANY_SPACE);
vm_map_unlock(map);
return (result);
}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index ed8864e..054c506 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -339,12 +339,16 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX)
/*
- * The following "find_space" options are supported by vm_map_find()
+ * The following "find_space" options are supported by vm_map_find().
+ *
+ * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
+ * the macro argument as log base 2 of the desired alignment.
*/
#define VMFS_NO_SPACE 0 /* don't find; use the given range */
#define VMFS_ANY_SPACE 1 /* find a range with any alignment */
#define VMFS_OPTIMAL_SPACE 2 /* find a range with optimal alignment*/
-#define VMFS_ALIGNED_SPACE 3 /* find a superpage-aligned range */
+#define VMFS_SUPER_SPACE 3 /* find a superpage-aligned range */
+#define VMFS_ALIGNED_SPACE(x) ((x) << 8) /* find a range with fixed alignment */
/*
* vm_map_wire and vm_map_unwire option flags
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 1b08097..53a7be5 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -201,7 +201,7 @@ sys_mmap(td, uap)
vm_prot_t cap_maxprot, prot, maxprot;
void *handle;
objtype_t handle_type;
- int flags, error;
+ int align, error, flags;
off_t pos;
struct vmspace *vms = td->td_proc->p_vmspace;
cap_rights_t rights;
@@ -251,6 +251,13 @@ sys_mmap(td, uap)
size += pageoff; /* low end... */
size = (vm_size_t) round_page(size); /* hi end */
+ /* Ensure alignment is at least a page and fits in a pointer. */
+ align = flags & MAP_ALIGNMENT_MASK;
+ if (align != 0 && align != MAP_ALIGNED_SUPER &&
+ (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
+ align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
+ return (EINVAL);
+
/*
* Check for illegal addresses. Watch out for address wrap... Note
* that VM_*_ADDRESS are not constants due to casts (argh).
@@ -1490,7 +1497,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
boolean_t fitit;
vm_object_t object = NULL;
struct thread *td = curthread;
- int docow, error, rv;
+ int docow, error, findspace, rv;
boolean_t writecounted;
if (size == 0)
@@ -1605,12 +1612,17 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (flags & MAP_STACK)
rv = vm_map_stack(map, *addr, size, prot, maxprot,
docow | MAP_STACK_GROWS_DOWN);
- else if (fitit)
- rv = vm_map_find(map, object, foff, addr, size,
- object != NULL && object->type == OBJT_DEVICE ?
- VMFS_ALIGNED_SPACE : VMFS_OPTIMAL_SPACE, prot, maxprot,
- docow);
- else
+ else if (fitit) {
+ if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
+ findspace = VMFS_SUPER_SPACE;
+ else if ((flags & MAP_ALIGNMENT_MASK) != 0)
+ findspace = VMFS_ALIGNED_SPACE(flags >>
+ MAP_ALIGNMENT_SHIFT);
+ else
+ findspace = VMFS_OPTIMAL_SPACE;
+ rv = vm_map_find(map, object, foff, addr, size, findspace,
+ prot, maxprot, docow);
+ } else
rv = vm_map_fixed(map, object, foff, *addr, size,
prot, maxprot, docow);
diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr
index bb70d13..5c82f30 100644
--- a/usr.bin/kdump/mksubr
+++ b/usr.bin/kdump/mksubr
@@ -385,7 +385,6 @@ auto_switch_type "lio_listioname" "LIO_(NO)?WAIT[[:space:]]+[0-9]+"
auto_switch_type "madvisebehavname" "_?MADV_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h"
auto_switch_type "minheritname" "INHERIT_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h"
auto_or_type "mlockallname" "MCL_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mman.h"
-auto_or_type "mmapflagsname" "MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h"
auto_or_type "mmapprotname" "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h"
auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h"
auto_or_type "mountflagsname" "MNT_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mount.h"
@@ -469,6 +468,40 @@ cat <<_EOF_
/*
* AUTO - Special
*
+ * The MAP_ALIGNED flag requires special handling.
+ */
+void
+mmapflagsname(int flags)
+{
+ int align;
+ int or = 0;
+ printf("%#x<", flags);
+_EOF_
+egrep "^#[[:space:]]*define[[:space:]]+MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+[[:space:]]*" \
+ $include_dir/sys/mman.h | grep -v MAP_ALIGNED | \
+ awk '{ for (i = 1; i <= NF; i++) \
+ if ($i ~ /define/) \
+ break; \
+ ++i; \
+ printf "\tif (!((flags > 0) ^ ((%s) > 0)))\n\t\tif_print_or(flags, %s, or);\n", $i, $i }'
+cat <<_EOF_
+ align = flags & MAP_ALIGNMENT_MASK;
+ if (align != 0) {
+ if (align == MAP_ALIGNED_SUPER)
+ print_or("MAP_ALIGNED_SUPER", or);
+ else {
+ print_or("MAP_ALIGNED", or);
+ printf("(%d)", align >> MAP_ALIGNMENT_SHIFT);
+ }
+ }
+ printf(">");
+ if (or == 0)
+ printf("<invalid>%d", flags);
+}
+
+/*
+ * AUTO - Special
+ *
* The only reason this is not fully automated is due to the
* grep -v RTP_PRIO statement. A better egrep line should
* make this capable of being a auto_switch_type() function.
diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c
index 6668d7c..af4ed5e 100644
--- a/usr.bin/truss/syscalls.c
+++ b/usr.bin/truss/syscalls.c
@@ -296,7 +296,7 @@ static struct xlat mmap_flags[] = {
X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME)
X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100)
X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON)
- X(MAP_NOCORE) XEND
+ X(MAP_NOCORE) X(MAP_PREFAULT_READ) XEND
};
static struct xlat mprot_flags[] = {
@@ -893,9 +893,41 @@ print_arg(struct syscall_args *sc, unsigned long *args, long retval,
case Mprot:
tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset]));
break;
- case Mmapflags:
- tmp = strdup(xlookup_bits(mmap_flags, args[sc->offset]));
+ case Mmapflags: {
+ const char *base, *alignstr;
+ int align, flags;
+
+ /*
+ * MAP_ALIGNED can't be handled by xlookup_bits(), so
+ * generate that string manually and prepend it to the
+ * string from xlookup_bits(). Have to be careful to
+ * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is
+ * the only flag.
+ */
+ flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK;
+ align = args[sc->offset] & MAP_ALIGNMENT_MASK;
+ if (align != 0) {
+ if (align == MAP_ALIGNED_SUPER)
+ alignstr = strdup("MAP_ALIGNED_SUPER");
+ else
+ asprintf(&alignstr, "MAP_ALIGNED(%d)",
+ align >> MAP_ALIGNMENT_SHIFT);
+ if (flags == 0) {
+ tmp = alignstr;
+ break;
+ }
+ } else
+ alignstr = NULL;
+ base = strdup(xlookup_bits(mmap_flags, flags));
+ if (alignstr == NULL) {
+ tmp = base;
+ break;
+ }
+ asprintf(&tmp, "%s|%s", alignstr, base);
+ free(alignstr);
+ free(base);
break;
+ }
case Whence:
tmp = strdup(xlookup(whence_arg, args[sc->offset]));
break;
OpenPOWER on IntegriCloud