summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2011-11-04 04:02:50 +0000
committerjhb <jhb@FreeBSD.org>2011-11-04 04:02:50 +0000
commit78c075174e74e727279365476d0d076d6c3e3075 (patch)
tree159ae25b13b965df34d0e93885cca08178c0b2a2 /sys
parent1e2d8c9d67bc3fa3bf3a560b9b8eac1745104048 (diff)
downloadFreeBSD-src-78c075174e74e727279365476d0d076d6c3e3075.zip
FreeBSD-src-78c075174e74e727279365476d0d076d6c3e3075.tar.gz
Add the posix_fadvise(2) system call. It is somewhat similar to
madvise(2) except that it operates on a file descriptor instead of a memory region. It is currently only supported on regular files. Just as with madvise(2), the advice given to posix_fadvise(2) can be divided into two types. The first type provide hints about data access patterns and are used in the file read and write routines to modify the I/O flags passed down to VOP_READ() and VOP_WRITE(). These modes are thus filesystem independent. Note that to ease implementation (and since this API is only advisory anyway), only a single non-normal range is allowed per file descriptor. The second type of hints are used to hint to the OS that data will or will not be used. These hints are implemented via a new VOP_ADVISE(). A default implementation is provided which does nothing for the WILLNEED request and attempts to move any clean pages to the cache page queue for the DONTNEED request. This latter case required two other changes. First, a new V_CLEANONLY flag was added to vinvalbuf(). This requests vinvalbuf() to only flush clean buffers for the vnode from the buffer cache and to not remove any backing pages from the vnode. This is used to ensure clean pages are not wired into the buffer cache before attempting to move them to the cache page queue. The second change adds a new vm_object_page_cache() method. This method is somewhat similar to vm_object_page_remove() except that instead of freeing each page in the specified range, it attempts to move clean pages to the cache queue if possible. To preserve the ABI of struct file, the f_cdevpriv pointer is now reused in a union to point to the currently active advice region if one is present for regular files. Reviewed by: jilles, kib, arch@ Approved by: re (kib) MFC after: 1 month
Diffstat (limited to 'sys')
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c13
-rw-r--r--sys/compat/freebsd32/syscalls.master5
-rw-r--r--sys/kern/syscalls.master3
-rw-r--r--sys/kern/vfs_default.c53
-rw-r--r--sys/kern/vfs_subr.c7
-rw-r--r--sys/kern/vfs_syscalls.c134
-rw-r--r--sys/kern/vfs_vnops.c76
-rw-r--r--sys/kern/vnode_if.src9
-rw-r--r--sys/sys/fcntl.h11
-rw-r--r--sys/sys/file.h15
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/vnode.h2
-rw-r--r--sys/vm/vm_object.c54
-rw-r--r--sys/vm/vm_object.h2
14 files changed, 364 insertions, 22 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index fc2932b..83ee962 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -2835,3 +2835,16 @@ freebsd32_posix_fallocate(struct thread *td,
ap.len = PAIR32TO64(off_t, uap->len);
return (sys_posix_fallocate(td, &ap));
}
+
+int
+freebsd32_posix_fadvise(struct thread *td,
+ struct freebsd32_posix_fadvise_args *uap)
+{
+ struct posix_fadvise_args ap;
+
+ ap.fd = uap->fd;
+ ap.offset = PAIR32TO64(off_t, uap->offset);
+ ap.len = PAIR32TO64(off_t, uap->len);
+ ap.advice = uap->advice;
+ return (sys_posix_fadvise(td, &ap));
+}
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index 9428b6c..cb22f8b 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -991,4 +991,7 @@
530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
uint32_t offset1, uint32_t offset2,\
uint32_t len1, uint32_t len2); }
-531 AUE_NULL UNIMPL posix_fadvise
+531 AUE_NULL STD { int freebsd32_posix_fadvise(int fd, \
+ uint32_t offset1, uint32_t offset2,\
+ uint32_t len1, uint32_t len2, \
+ int advice); }
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index b79c6c7..8188ccd 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -947,6 +947,7 @@
size_t outbuflen); }
530 AUE_NULL STD { int posix_fallocate(int fd, \
off_t offset, off_t len); }
-531 AUE_NULL UNIMPL posix_fadvise
+531 AUE_NULL STD { int posix_fadvise(int fd, off_t offset, \
+ off_t len, int advice); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index b89d990..e9f8151 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -96,6 +96,7 @@ struct vop_vector default_vnodeops = {
.vop_access = vop_stdaccess,
.vop_accessx = vop_stdaccessx,
+ .vop_advise = vop_stdadvise,
.vop_advlock = vop_stdadvlock,
.vop_advlockasync = vop_stdadvlockasync,
.vop_advlockpurge = vop_stdadvlockpurge,
@@ -984,6 +985,58 @@ vop_stdallocate(struct vop_allocate_args *ap)
return (error);
}
+int
+vop_stdadvise(struct vop_advise_args *ap)
+{
+ struct vnode *vp;
+ off_t start, end;
+ int error, vfslocked;
+
+ vp = ap->a_vp;
+ switch (ap->a_advice) {
+ case POSIX_FADV_WILLNEED:
+ /*
+ * Do nothing for now. Filesystems should provide a
+ * custom method which starts an asynchronous read of
+ * the requested region.
+ */
+ error = 0;
+ break;
+ case POSIX_FADV_DONTNEED:
+ /*
+ * Flush any open FS buffers and then remove pages
+ * from the backing VM object. Using vinvalbuf() here
+ * is a bit heavy-handed as it flushes all buffers for
+ * the given vnode, not just the buffers covering the
+ * requested range.
+ */
+ error = 0;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ if (vp->v_iflag & VI_DOOMED) {
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ break;
+ }
+ vinvalbuf(vp, V_CLEANONLY, 0, 0);
+ if (vp->v_object != NULL) {
+ start = trunc_page(ap->a_start);
+ end = round_page(ap->a_end);
+ VM_OBJECT_LOCK(vp->v_object);
+ vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
+ OFF_TO_IDX(end));
+ VM_OBJECT_UNLOCK(vp->v_object);
+ }
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
/*
* vfs default ops
* used to fill the vfs function table to get reasonable default return values.
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 10d3ad2..2872f77 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1191,7 +1191,7 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
do {
error = flushbuflist(&bo->bo_clean,
flags, bo, slpflag, slptimeo);
- if (error == 0)
+ if (error == 0 && !(flags & V_CLEANONLY))
error = flushbuflist(&bo->bo_dirty,
flags, bo, slpflag, slptimeo);
if (error != 0 && error != EAGAIN) {
@@ -1220,7 +1220,8 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
/*
* Destroy the copy in the VM cache, too.
*/
- if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL)) == 0) {
+ if (bo->bo_object != NULL &&
+ (flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0) {
VM_OBJECT_LOCK(bo->bo_object);
vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ?
OBJPR_CLEANONLY : 0);
@@ -1229,7 +1230,7 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
#ifdef INVARIANTS
BO_LOCK(bo);
- if ((flags & (V_ALT | V_NORMAL)) == 0 &&
+ if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0 &&
(bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
panic("vinvalbuf: flush failed");
BO_UNLOCK(bo);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index ec5ad06..e886079 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -86,6 +86,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/uma.h>
+static MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
+
SDT_PROVIDER_DEFINE(vfs);
SDT_PROBE_DEFINE(vfs, , stat, mode, mode);
SDT_PROBE_ARGTYPE(vfs, , stat, mode, 0, "char *");
@@ -4845,3 +4847,135 @@ sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
}
+
+/*
+ * Unlike madvise(2), we do not make a best effort to remember every
+ * possible caching hint. Instead, we remember the last setting with
+ * the exception that we will allow POSIX_FADV_NORMAL to adjust the
+ * region of any current setting.
+ */
+int
+sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
+{
+ struct fadvise_info *fa, *new;
+ struct file *fp;
+ struct vnode *vp;
+ off_t end;
+ int error;
+
+ if (uap->offset < 0 || uap->len < 0 ||
+ uap->offset > OFF_MAX - uap->len)
+ return (EINVAL);
+ switch (uap->advice) {
+ case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_RANDOM:
+ case POSIX_FADV_NOREUSE:
+ new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
+ break;
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_WILLNEED:
+ case POSIX_FADV_DONTNEED:
+ new = NULL;
+ break;
+ default:
+ return (EINVAL);
+ }
+ /* XXX: CAP_POSIX_FADVISE? */
+ error = fget(td, uap->fd, 0, &fp);
+ if (error != 0)
+ goto out;
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ break;
+ case DTYPE_PIPE:
+ case DTYPE_FIFO:
+ error = ESPIPE;
+ goto out;
+ default:
+ error = ENODEV;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VREG) {
+ error = ENODEV;
+ goto out;
+ }
+ if (uap->len == 0)
+ end = OFF_MAX;
+ else
+ end = uap->offset + uap->len - 1;
+ switch (uap->advice) {
+ case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_RANDOM:
+ case POSIX_FADV_NOREUSE:
+ /*
+ * Try to merge any existing non-standard region with
+ * this new region if possible, otherwise create a new
+ * non-standard region for this request.
+ */
+ mtx_pool_lock(mtxpool_sleep, fp);
+ fa = fp->f_advice;
+ if (fa != NULL && fa->fa_advice == uap->advice &&
+ ((fa->fa_start <= end && fa->fa_end >= uap->offset) ||
+ (end != OFF_MAX && fa->fa_start == end + 1) ||
+ (fa->fa_end != OFF_MAX && fa->fa_end + 1 == uap->offset))) {
+ if (uap->offset < fa->fa_start)
+ fa->fa_start = uap->offset;
+ if (end > fa->fa_end)
+ fa->fa_end = end;
+ } else {
+ new->fa_advice = uap->advice;
+ new->fa_start = uap->offset;
+ new->fa_end = end;
+ fp->f_advice = new;
+ new = fa;
+ }
+ mtx_pool_unlock(mtxpool_sleep, fp);
+ break;
+ case POSIX_FADV_NORMAL:
+ /*
+ * If a the "normal" region overlaps with an existing
+ * non-standard region, trim or remove the
+ * non-standard region.
+ */
+ mtx_pool_lock(mtxpool_sleep, fp);
+ fa = fp->f_advice;
+ if (fa != NULL) {
+ if (uap->offset <= fa->fa_start &&
+ end >= fa->fa_end) {
+ new = fa;
+ fp->f_advice = NULL;
+ } else if (uap->offset <= fa->fa_start &&
+ end >= fa->fa_start)
+ fa->fa_start = end + 1;
+ else if (uap->offset <= fa->fa_end &&
+ end >= fa->fa_end)
+ fa->fa_end = uap->offset - 1;
+ else if (uap->offset >= fa->fa_start &&
+ end <= fa->fa_end) {
+ /*
+ * If the "normal" region is a middle
+ * portion of the existing
+ * non-standard region, just remove
+ * the whole thing rather than picking
+ * one side or the other to
+ * preserve.
+ */
+ new = fa;
+ fp->f_advice = NULL;
+ }
+ }
+ mtx_pool_unlock(mtxpool_sleep, fp);
+ break;
+ case POSIX_FADV_WILLNEED:
+ case POSIX_FADV_DONTNEED:
+ error = VOP_ADVISE(vp, uap->offset, end, uap->advice);
+ break;
+ }
+out:
+ if (fp != NULL)
+ fdrop(fp, td);
+ free(new, M_FADVISE);
+ return (error);
+}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 17dc5e7..e33592a 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -518,7 +518,7 @@ vn_read(fp, uio, active_cred, flags, td)
struct vnode *vp;
int error, ioflag;
struct mtx *mtxp;
- int vfslocked;
+ int advice, vfslocked;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -529,27 +529,48 @@ vn_read(fp, uio, active_cred, flags, td)
ioflag |= IO_NDELAY;
if (fp->f_flag & O_DIRECT)
ioflag |= IO_DIRECT;
+ advice = POSIX_FADV_NORMAL;
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
/*
* According to McKusick the vn lock was protecting f_offset here.
* It is now protected by the FOFFSET_LOCKED flag.
*/
- if ((flags & FOF_OFFSET) == 0) {
+ if ((flags & FOF_OFFSET) == 0 || fp->f_advice != NULL) {
mtxp = mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp);
- while(fp->f_vnread_flags & FOFFSET_LOCKED) {
- fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
- msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
- "vnread offlock", 0);
+ if ((flags & FOF_OFFSET) == 0) {
+ while (fp->f_vnread_flags & FOFFSET_LOCKED) {
+ fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
+ msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+ "vnread offlock", 0);
+ }
+ fp->f_vnread_flags |= FOFFSET_LOCKED;
+ uio->uio_offset = fp->f_offset;
}
- fp->f_vnread_flags |= FOFFSET_LOCKED;
+ if (fp->f_advice != NULL &&
+ uio->uio_offset >= fp->f_advice->fa_start &&
+ uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
+ advice = fp->f_advice->fa_advice;
mtx_unlock(mtxp);
- vn_lock(vp, LK_SHARED | LK_RETRY);
- uio->uio_offset = fp->f_offset;
- } else
- vn_lock(vp, LK_SHARED | LK_RETRY);
+ }
+ vn_lock(vp, LK_SHARED | LK_RETRY);
- ioflag |= sequential_heuristic(uio, fp);
+ switch (advice) {
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_SEQUENTIAL:
+ ioflag |= sequential_heuristic(uio, fp);
+ break;
+ case POSIX_FADV_RANDOM:
+ /* Disable read-ahead for random I/O. */
+ break;
+ case POSIX_FADV_NOREUSE:
+ /*
+ * Request the underlying FS to discard the buffers
+ * and pages after the I/O is complete.
+ */
+ ioflag |= IO_DIRECT;
+ break;
+ }
#ifdef MAC
error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
@@ -584,7 +605,8 @@ vn_write(fp, uio, active_cred, flags, td)
struct vnode *vp;
struct mount *mp;
int error, ioflag, lock_flags;
- int vfslocked;
+ struct mtx *mtxp;
+ int advice, vfslocked;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -618,7 +640,33 @@ vn_write(fp, uio, active_cred, flags, td)
vn_lock(vp, lock_flags | LK_RETRY);
if ((flags & FOF_OFFSET) == 0)
uio->uio_offset = fp->f_offset;
- ioflag |= sequential_heuristic(uio, fp);
+ advice = POSIX_FADV_NORMAL;
+ if (fp->f_advice != NULL) {
+ mtxp = mtx_pool_find(mtxpool_sleep, fp);
+ mtx_lock(mtxp);
+ if (fp->f_advice != NULL &&
+ uio->uio_offset >= fp->f_advice->fa_start &&
+ uio->uio_offset + uio->uio_resid <= fp->f_advice->fa_end)
+ advice = fp->f_advice->fa_advice;
+ mtx_unlock(mtxp);
+ }
+ switch (advice) {
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_SEQUENTIAL:
+ ioflag |= sequential_heuristic(uio, fp);
+ break;
+ case POSIX_FADV_RANDOM:
+ /* XXX: Is this correct? */
+ break;
+ case POSIX_FADV_NOREUSE:
+ /*
+ * Request the underlying FS to discard the buffers
+ * and pages after the I/O is complete.
+ */
+ ioflag |= IO_DIRECT;
+ break;
+ }
+
#ifdef MAC
error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
if (error == 0)
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index f75e7df..6f24d17 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -628,3 +628,12 @@ vop_allocate {
INOUT off_t *offset;
INOUT off_t *len;
};
+
+%% advise vp U U U
+
+vop_advise {
+ IN struct vnode *vp;
+ IN off_t start;
+ IN off_t end;
+ IN int advice;
+};
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 28a66d0..29b2a0c 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -277,9 +277,17 @@ struct oflock {
#define LOCK_UN 0x08 /* unlock file */
#endif
+#if __POSIX_VISIBLE >= 200112
/*
- * XXX missing posix_fadvise() and POSIX_FADV_* macros.
+ * Advice to posix_fadvise
*/
+#define POSIX_FADV_NORMAL 0 /* no special treatment */
+#define POSIX_FADV_RANDOM 1 /* expect random page references */
+#define POSIX_FADV_SEQUENTIAL 2 /* expect sequential page references */
+#define POSIX_FADV_WILLNEED 3 /* will need these pages */
+#define POSIX_FADV_DONTNEED 4 /* dont need these pages */
+#define POSIX_FADV_NOREUSE 5 /* access data only once */
+#endif
#ifndef _KERNEL
__BEGIN_DECLS
@@ -293,6 +301,7 @@ int flock(int, int);
int openat(int, const char *, int, ...);
#endif
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int posix_fadvise(int, off_t, off_t, int);
int posix_fallocate(int, off_t, off_t);
#endif
__END_DECLS
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 57e7047..99889ba 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -122,6 +122,12 @@ struct fileops {
* none not locked
*/
+struct fadvise_info {
+ int fa_advice; /* (f) FADV_* type. */
+ off_t fa_start; /* (f) Region start. */
+ off_t fa_end; /* (f) Region end. */
+};
+
struct file {
void *f_data; /* file descriptor specific data */
struct fileops *f_ops; /* File operations */
@@ -136,7 +142,11 @@ struct file {
*/
int f_seqcount; /* Count of sequential accesses. */
off_t f_nextoff; /* next expected read/write offset. */
- struct cdev_privdata *f_cdevpriv; /* (d) Private data for the cdev. */
+ union {
+ struct cdev_privdata *fvn_cdevpriv;
+ /* (d) Private data for the cdev. */
+ struct fadvise_info *fvn_advice;
+ } f_vnun;
/*
* DFLAG_SEEKABLE specific fields
*/
@@ -147,6 +157,9 @@ struct file {
void *f_label; /* Place-holder for MAC label. */
};
+#define f_cdevpriv f_vnun.fvn_cdevpriv
+#define f_advice f_vnun.fvn_advice
+
#define FOFFSET_LOCKED 0x1
#define FOFFSET_LOCK_WAITING 0x2
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 57ff3a3..893061a 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1000000 /* Master, propagated to newvers */
+#define __FreeBSD_version 1000001 /* Master, propagated to newvers */
#ifdef _KERNEL
#define P_OSREL_SIGWAIT 700000
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 4cb6633..7382336 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -384,6 +384,7 @@ extern int vttoif_tab[];
#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
#define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */
#define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */
+#define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */
#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
#define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */
#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
@@ -685,6 +686,7 @@ int vop_stdunlock(struct vop_unlock_args *);
int vop_nopoll(struct vop_poll_args *);
int vop_stdaccess(struct vop_access_args *ap);
int vop_stdaccessx(struct vop_accessx_args *ap);
+int vop_stdadvise(struct vop_advise_args *ap);
int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 3de793b..600dea8 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1863,6 +1863,60 @@ skipmemq:
}
/*
+ * vm_object_page_cache:
+ *
+ * For the given object, attempt to move the specified clean
+ * pages to the cache queue. If a page is wired for any reason,
+ * then it will not be changed. Pages are specified by the given
+ * range ["start", "end"). As a special case, if "end" is zero,
+ * then the range extends from "start" to the end of the object.
+ * Any mappings to the specified pages are removed before the
+ * pages are moved to the cache queue.
+ *
+ * This operation should only be performed on objects that
+ * contain managed pages.
+ *
+ * The object must be locked.
+ */
+void
+vm_object_page_cache(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
+{
+ struct mtx *mtx, *new_mtx;
+ vm_page_t p, next;
+
+ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+ KASSERT((object->type != OBJT_DEVICE && object->type != OBJT_SG &&
+ object->type != OBJT_PHYS),
+ ("vm_object_page_cache: illegal object %p", object));
+ if (object->resident_page_count == 0)
+ return;
+ p = vm_page_find_least(object, start);
+
+ /*
+ * Here, the variable "p" is either (1) the page with the least pindex
+ * greater than or equal to the parameter "start" or (2) NULL.
+ */
+ mtx = NULL;
+ for (; p != NULL && (p->pindex < end || end == 0); p = next) {
+ next = TAILQ_NEXT(p, listq);
+
+ /*
+ * Avoid releasing and reacquiring the same page lock.
+ */
+ new_mtx = vm_page_lockptr(p);
+ if (mtx != new_mtx) {
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+ mtx = new_mtx;
+ mtx_lock(mtx);
+ }
+ vm_page_try_to_cache(p);
+ }
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+}
+
+/*
* Populate the specified range of the object with valid pages. Returns
* TRUE if the range is successfully populated and FALSE otherwise.
*
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index a11f144..0c13786 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -223,6 +223,8 @@ void vm_object_destroy (vm_object_t);
void vm_object_terminate (vm_object_t);
void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
+void vm_object_page_cache(vm_object_t object, vm_pindex_t start,
+ vm_pindex_t end);
void vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
vm_ooffset_t end, int flags);
void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
OpenPOWER on IntegriCloud