summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2013-08-16 21:13:55 +0000
committerjhb <jhb@FreeBSD.org>2013-08-16 21:13:55 +0000
commit3bfcb89de4b7ac6c54e2affe99eccd1482eb4327 (patch)
tree7565121f7abca9570dd1e0d308a2dd838e3c735b
parentefcf22ed8cf8c3e8d2e5fa37ae4c4a9935a3d597 (diff)
downloadFreeBSD-src-3bfcb89de4b7ac6c54e2affe99eccd1482eb4327.zip
FreeBSD-src-3bfcb89de4b7ac6c54e2affe99eccd1482eb4327.tar.gz
Add new mmap(2) flags to permit applications to request specific virtual
address alignment of mappings. - MAP_ALIGNED(n) requests a mapping aligned on a boundary of (1 << n). Requests for n >= number of bits in a pointer or less than the size of a page fail with EINVAL. This matches the API provided by NetBSD. - MAP_ALIGNED_SUPER is a special case of MAP_ALIGNED. It can be used to optimize the chances of using large pages. By default it will align the mapping on a large page boundary (the system is free to choose any large page size to align to that seems best for the mapping request). However, if the object being mapped is already using large pages, then it will align the virtual mapping to match the existing large pages in the object instead. - Internally, VMFS_ALIGNED_SPACE is now renamed to VMFS_SUPER_SPACE, and VMFS_ALIGNED_SPACE(n) is repurposed for specifying a specific alignment. MAP_ALIGNED(n) maps to using VMFS_ALIGNED_SPACE(n), while MAP_ALIGNED_SUPER maps to VMFS_SUPER_SPACE. - mmap() of a device object now uses VMFS_OPTIMAL_SPACE rather than explicitly using VMFS_SUPER_SPACE. All device objects are forced to use a specific color on creation, so VMFS_OPTIMAL_SPACE is effectively equivalent. Reviewed by: alc MFC after: 1 month
-rw-r--r--lib/libc/sys/mmap.253
-rw-r--r--sys/sys/mman.h11
-rw-r--r--sys/vm/vm_init.c2
-rw-r--r--sys/vm/vm_kern.c2
-rw-r--r--sys/vm/vm_map.c21
-rw-r--r--sys/vm/vm_map.h8
-rw-r--r--sys/vm/vm_mmap.c28
-rw-r--r--usr.bin/kdump/mksubr35
-rw-r--r--usr.bin/truss/syscalls.c38
9 files changed, 174 insertions, 24 deletions
diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2
index 73ffb2e..130f70b 100644
--- a/lib/libc/sys/mmap.2
+++ b/lib/libc/sys/mmap.2
@@ -28,7 +28,7 @@
.\" @(#)mmap.2 8.4 (Berkeley) 5/11/95
.\" $FreeBSD$
.\"
-.Dd March 18, 2012
+.Dd August 16, 2013
.Dt MMAP 2
.Os
.Sh NAME
@@ -97,7 +97,30 @@ Sharing, mapping type and options are specified in the
argument by
.Em or Ns 'ing
the following values:
-.Bl -tag -width MAP_HASSEMAPHORE
+.Bl -tag -width MAP_PREFAULT_READ
+.It Dv MAP_ALIGNED Ns Pq Fa n
+Align the region on a requested boundary.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The
+.Fa n
+argument specifies the binary logarithm of the desired alignment.
+.It Dv MAP_ALIGNED_SUPER
+Align the region to maximize the potential use of large
+.Pq Dq super
+pages.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The system will choose a suitable page size based on the size of
+mapping.
+The page size used as well as the alignment of the region may both be
+affected by properties of the file being mapped.
+In particular,
+the physical address of existing pages of a file may require a specific
+alignment.
+The region is not guaranteed to be aligned on any specific boundary.
.It Dv MAP_ANON
Map anonymous memory not associated with any specific file.
The file descriptor used for creating
@@ -274,6 +297,25 @@ Although this implementation does not impose any alignment restrictions on
the
.Fa offset
argument, a portable program must only use page-aligned values.
+.Pp
+Large page mappings require that the pages backing an object be
+aligned in matching blocks in both the virtual address space and RAM.
+The system will automatically attempt to use large page mappings when
+mapping an object that is already backed by large pages in RAM by
+aligning the mapping request in the virtual address space to match the
+alignment of the large physical pages.
+The system may also use large page mappings when mapping portions of an
+object that are not yet backed by pages in RAM.
+The
+.Dv MAP_ALIGNED_SUPER
+flag is an optimization that will align the mapping request to the
+size of a large page similar to
+.Dv MAP_ALIGNED ,
+except that the system will override this alignment if an object already
+uses large pages so that the mapping will be consistent with the existing
+large pages.
+This flag is mostly useful for maximizing the use of large pages on the
+first mapping of objects that do not yet have pages present in RAM.
.Sh RETURN VALUES
Upon successful completion,
.Fn mmap
@@ -325,6 +367,10 @@ The
argument
was equal to zero.
.It Bq Er EINVAL
+.Dv MAP_ALIGNED
+was specified and the desired alignment was either larger than the
+virtual address size of the machine or smaller than a page.
+.It Bq Er EINVAL
.Dv MAP_ANON
was specified and the
.Fa fd
@@ -356,7 +402,8 @@ was specified and insufficient memory was available.
.Xr msync 2 ,
.Xr munlock 2 ,
.Xr munmap 2 ,
-.Xr getpagesize 3
+.Xr getpagesize 3 ,
+.Xr getpagesizes 3
.Sh BUGS
The
.Fa len
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
index a178d7c..c5e47a0 100644
--- a/sys/sys/mman.h
+++ b/sys/sys/mman.h
@@ -91,6 +91,17 @@
*/
#define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */
#define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
+
+/*
+ * Request specific alignment (n == log2 of the desired alignment).
+ *
+ * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
+ * not enforce a specific alignment.
+ */
+#define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT)
+#define MAP_ALIGNMENT_SHIFT 24
+#define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff)
+#define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */
#endif /* __BSD_VISIBLE */
#if __POSIX_VISIBLE >= 199309
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index b539f9d..7ab1ee0 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -112,7 +112,7 @@ kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp)
addr = vm_map_min(kernel_map);
result = vm_map_find(kernel_map, NULL, 0, &addr, size,
- VMFS_ALIGNED_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
if (result != KERN_SUCCESS)
return (ENOMEM);
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index c7cb409..9790653 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -286,7 +286,7 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max,
*min = vm_map_min(parent);
ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ?
- VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
+ VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
MAP_ACC_NO_CHARGE);
if (ret != KERN_SUCCESS)
panic("kmem_suballoc: bad status return of %d", ret);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 1d92965..1a6146e 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1434,12 +1434,17 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_size_t length, int find_space, vm_prot_t prot,
vm_prot_t max, int cow)
{
- vm_offset_t start, initial_addr;
+ vm_offset_t alignment, initial_addr, start;
int result;
if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
(object->flags & OBJ_COLORED) == 0))
- find_space = VMFS_ANY_SPACE;
+ find_space = VMFS_ANY_SPACE;
+ if (find_space >> 8 != 0) {
+ KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
+ alignment = (vm_offset_t)1 << (find_space >> 8);
+ } else
+ alignment = 0;
initial_addr = *addr;
again:
start = initial_addr;
@@ -1455,12 +1460,18 @@ again:
return (KERN_NO_SPACE);
}
switch (find_space) {
- case VMFS_ALIGNED_SPACE:
+ case VMFS_SUPER_SPACE:
case VMFS_OPTIMAL_SPACE:
pmap_align_superpage(object, offset, addr,
length);
break;
+ case VMFS_ANY_SPACE:
+ break;
default:
+ if ((*addr & (alignment - 1)) != 0) {
+ *addr &= ~(alignment - 1);
+ *addr += alignment;
+ }
break;
}
@@ -1468,8 +1479,8 @@ again:
}
result = vm_map_insert(map, object, offset, start, start +
length, prot, max, cow);
- } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE ||
- find_space == VMFS_OPTIMAL_SPACE));
+ } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
+ find_space != VMFS_ANY_SPACE);
vm_map_unlock(map);
return (result);
}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index ed8864e..054c506 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -339,12 +339,16 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX)
/*
- * The following "find_space" options are supported by vm_map_find()
+ * The following "find_space" options are supported by vm_map_find().
+ *
+ * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
+ * the macro argument as log base 2 of the desired alignment.
*/
#define VMFS_NO_SPACE 0 /* don't find; use the given range */
#define VMFS_ANY_SPACE 1 /* find a range with any alignment */
#define VMFS_OPTIMAL_SPACE 2 /* find a range with optimal alignment*/
-#define VMFS_ALIGNED_SPACE 3 /* find a superpage-aligned range */
+#define VMFS_SUPER_SPACE 3 /* find a superpage-aligned range */
+#define VMFS_ALIGNED_SPACE(x) ((x) << 8) /* find a range with fixed alignment */
/*
* vm_map_wire and vm_map_unwire option flags
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 1b08097..53a7be5 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -201,7 +201,7 @@ sys_mmap(td, uap)
vm_prot_t cap_maxprot, prot, maxprot;
void *handle;
objtype_t handle_type;
- int flags, error;
+ int align, error, flags;
off_t pos;
struct vmspace *vms = td->td_proc->p_vmspace;
cap_rights_t rights;
@@ -251,6 +251,13 @@ sys_mmap(td, uap)
size += pageoff; /* low end... */
size = (vm_size_t) round_page(size); /* hi end */
+ /* Ensure alignment is at least a page and fits in a pointer. */
+ align = flags & MAP_ALIGNMENT_MASK;
+ if (align != 0 && align != MAP_ALIGNED_SUPER &&
+ (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
+ align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
+ return (EINVAL);
+
/*
* Check for illegal addresses. Watch out for address wrap... Note
* that VM_*_ADDRESS are not constants due to casts (argh).
@@ -1490,7 +1497,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
boolean_t fitit;
vm_object_t object = NULL;
struct thread *td = curthread;
- int docow, error, rv;
+ int docow, error, findspace, rv;
boolean_t writecounted;
if (size == 0)
@@ -1605,12 +1612,17 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (flags & MAP_STACK)
rv = vm_map_stack(map, *addr, size, prot, maxprot,
docow | MAP_STACK_GROWS_DOWN);
- else if (fitit)
- rv = vm_map_find(map, object, foff, addr, size,
- object != NULL && object->type == OBJT_DEVICE ?
- VMFS_ALIGNED_SPACE : VMFS_OPTIMAL_SPACE, prot, maxprot,
- docow);
- else
+ else if (fitit) {
+ if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
+ findspace = VMFS_SUPER_SPACE;
+ else if ((flags & MAP_ALIGNMENT_MASK) != 0)
+ findspace = VMFS_ALIGNED_SPACE(flags >>
+ MAP_ALIGNMENT_SHIFT);
+ else
+ findspace = VMFS_OPTIMAL_SPACE;
+ rv = vm_map_find(map, object, foff, addr, size, findspace,
+ prot, maxprot, docow);
+ } else
rv = vm_map_fixed(map, object, foff, *addr, size,
prot, maxprot, docow);
diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr
index bb70d13..5c82f30 100644
--- a/usr.bin/kdump/mksubr
+++ b/usr.bin/kdump/mksubr
@@ -385,7 +385,6 @@ auto_switch_type "lio_listioname" "LIO_(NO)?WAIT[[:space:]]+[0-9]+"
auto_switch_type "madvisebehavname" "_?MADV_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h"
auto_switch_type "minheritname" "INHERIT_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h"
auto_or_type "mlockallname" "MCL_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mman.h"
-auto_or_type "mmapflagsname" "MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h"
auto_or_type "mmapprotname" "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h"
auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h"
auto_or_type "mountflagsname" "MNT_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mount.h"
@@ -469,6 +468,40 @@ cat <<_EOF_
/*
* AUTO - Special
*
+ * The MAP_ALIGNED flag requires special handling.
+ */
+void
+mmapflagsname(int flags)
+{
+ int align;
+ int or = 0;
+ printf("%#x<", flags);
+_EOF_
+egrep "^#[[:space:]]*define[[:space:]]+MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+[[:space:]]*" \
+ $include_dir/sys/mman.h | grep -v MAP_ALIGNED | \
+ awk '{ for (i = 1; i <= NF; i++) \
+ if ($i ~ /define/) \
+ break; \
+ ++i; \
+ printf "\tif (!((flags > 0) ^ ((%s) > 0)))\n\t\tif_print_or(flags, %s, or);\n", $i, $i }'
+cat <<_EOF_
+ align = flags & MAP_ALIGNMENT_MASK;
+ if (align != 0) {
+ if (align == MAP_ALIGNED_SUPER)
+ print_or("MAP_ALIGNED_SUPER", or);
+ else {
+ print_or("MAP_ALIGNED", or);
+ printf("(%d)", align >> MAP_ALIGNMENT_SHIFT);
+ }
+ }
+ printf(">");
+ if (or == 0)
+ printf("<invalid>%d", flags);
+}
+
+/*
+ * AUTO - Special
+ *
* The only reason this is not fully automated is due to the
* grep -v RTP_PRIO statement. A better egrep line should
* make this capable of being a auto_switch_type() function.
diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c
index 6668d7c..af4ed5e 100644
--- a/usr.bin/truss/syscalls.c
+++ b/usr.bin/truss/syscalls.c
@@ -296,7 +296,7 @@ static struct xlat mmap_flags[] = {
X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME)
X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100)
X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON)
- X(MAP_NOCORE) XEND
+ X(MAP_NOCORE) X(MAP_PREFAULT_READ) XEND
};
static struct xlat mprot_flags[] = {
@@ -893,9 +893,41 @@ print_arg(struct syscall_args *sc, unsigned long *args, long retval,
case Mprot:
tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset]));
break;
- case Mmapflags:
- tmp = strdup(xlookup_bits(mmap_flags, args[sc->offset]));
+ case Mmapflags: {
+ const char *base, *alignstr;
+ int align, flags;
+
+ /*
+ * MAP_ALIGNED can't be handled by xlookup_bits(), so
+ * generate that string manually and prepend it to the
+ * string from xlookup_bits(). Have to be careful to
+ * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is
+ * the only flag.
+ */
+ flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK;
+ align = args[sc->offset] & MAP_ALIGNMENT_MASK;
+ if (align != 0) {
+ if (align == MAP_ALIGNED_SUPER)
+ alignstr = strdup("MAP_ALIGNED_SUPER");
+ else
+ asprintf(&alignstr, "MAP_ALIGNED(%d)",
+ align >> MAP_ALIGNMENT_SHIFT);
+ if (flags == 0) {
+ tmp = alignstr;
+ break;
+ }
+ } else
+ alignstr = NULL;
+ base = strdup(xlookup_bits(mmap_flags, flags));
+ if (alignstr == NULL) {
+ tmp = base;
+ break;
+ }
+ asprintf(&tmp, "%s|%s", alignstr, base);
+ free(alignstr);
+ free(base);
break;
+ }
case Whence:
tmp = strdup(xlookup(whence_arg, args[sc->offset]));
break;
OpenPOWER on IntegriCloud