summaryrefslogtreecommitdiffstats
path: root/sys/kern/uipc_shm.c
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2012-01-08 20:09:26 +0000
committeralc <alc@FreeBSD.org>2012-01-08 20:09:26 +0000
commitd27a56b062db860080f65f40097ac26e66b4a97b (patch)
tree1c369d88e92873ad7b4400ceb38aefd95117ec81 /sys/kern/uipc_shm.c
parentdc793ed733447dc4d157638ad2199e7761b39e7f (diff)
downloadFreeBSD-src-d27a56b062db860080f65f40097ac26e66b4a97b.zip
FreeBSD-src-d27a56b062db860080f65f40097ac26e66b4a97b.tar.gz
Correct an error of omission in the implementation of the truncation
operation on POSIX shared memory objects and tmpfs. Previously, neither of these modules correctly handled the case in which the new size of the object or file was not a multiple of the page size. Specifically, they did not handle partial page truncation of data stored on swap. As a result, stale data might later be returned to an application. Interestingly, a data inconsistency was less likely to occur under tmpfs than POSIX shared memory objects. The reason being that a different mistake by the tmpfs truncation operation helped avoid a data inconsistency. If the data was still resident in memory in a PG_CACHED page, then the tmpfs truncation operation would reactivate that page, zero the truncated portion, and leave the page pinned in memory. More precisely, the benevolent error was that the truncation operation didn't add the reactivated page to any of the paging queues, effectively pinning the page. This page would remain pinned until the file was destroyed or the page was read or written. With this change, the page is now added to the inactive queue. Discussed with: jhb Reviewed by: kib (an earlier version) MFC after: 3 weeks
Diffstat (limited to 'sys/kern/uipc_shm.c')
-rw-r--r--sys/kern/uipc_shm.c101
1 files changed, 54 insertions, 47 deletions
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 4d064b5..ce7c722 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -39,13 +39,6 @@
*
* (3) Resource limits? Does this need its own resource limits or are the
* existing limits in mmap(2) sufficient?
- *
- * (4) Partial page truncation. vnode_pager_setsize() will zero any parts
- * of a partially mapped page as a result of ftruncate(2)/truncate(2).
- * We can do the same (with the same pmap evil), but do we need to
- * worry about the bits on disk if the page is swapped out or will the
- * swapper zero the parts of a page that are invalid if the page is
- * swapped back in for us?
*/
#include <sys/cdefs.h>
@@ -86,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/swap_pager.h>
@@ -253,9 +247,10 @@ static int
shm_dotruncate(struct shmfd *shmfd, off_t length)
{
vm_object_t object;
- vm_page_t m;
- vm_pindex_t nobjsize;
+ vm_page_t m, ma[1];
+ vm_pindex_t idx, nobjsize;
vm_ooffset_t delta;
+ int base, rv;
object = shmfd->shm_object;
VM_OBJECT_LOCK(object);
@@ -275,6 +270,56 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
VM_OBJECT_UNLOCK(object);
return (EBUSY);
}
+
+ /*
+ * Zero the truncated part of the last page.
+ */
+ base = length & PAGE_MASK;
+ if (base != 0) {
+ idx = OFF_TO_IDX(length);
+retry:
+ m = vm_page_lookup(object, idx);
+ if (m != NULL) {
+ if ((m->oflags & VPO_BUSY) != 0 ||
+ m->busy != 0) {
+ vm_page_sleep(m, "shmtrc");
+ goto retry;
+ }
+ } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
+ m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL);
+ if (m == NULL) {
+ VM_OBJECT_UNLOCK(object);
+ VM_WAIT;
+ VM_OBJECT_LOCK(object);
+ goto retry;
+ } else if (m->valid != VM_PAGE_BITS_ALL) {
+ ma[0] = m;
+ rv = vm_pager_get_pages(object, ma, 1,
+ 0);
+ m = vm_page_lookup(object, idx);
+ } else
+ /* A cached page was reactivated. */
+ rv = VM_PAGER_OK;
+ vm_page_lock(m);
+ if (rv == VM_PAGER_OK) {
+ vm_page_deactivate(m);
+ vm_page_unlock(m);
+ vm_page_wakeup(m);
+ } else {
+ vm_page_free(m);
+ vm_page_unlock(m);
+ VM_OBJECT_UNLOCK(object);
+ return (EIO);
+ }
+ }
+ if (m != NULL) {
+ pmap_zero_page_area(m, base, PAGE_SIZE - base);
+ KASSERT(m->valid == VM_PAGE_BITS_ALL,
+ ("shm_dotruncate: page %p is invalid", m));
+ vm_page_dirty(m);
+ vm_pager_page_unswapped(m);
+ }
+ }
delta = ptoa(object->size - nobjsize);
/* Toss in memory pages. */
@@ -289,45 +334,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
/* Free the swap accounted for shm */
swap_release_by_cred(delta, object->cred);
object->charge -= delta;
-
- /*
- * If the last page is partially mapped, then zero out
- * the garbage at the end of the page. See comments
- * in vnode_pager_setsize() for more details.
- *
- * XXXJHB: This handles in memory pages, but what about
- * a page swapped out to disk?
- */
- if ((length & PAGE_MASK) &&
- (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
- m->valid != 0) {
- int base = (int)length & PAGE_MASK;
- int size = PAGE_SIZE - base;
-
- pmap_zero_page_area(m, base, size);
-
- /*
- * Update the valid bits to reflect the blocks that
- * have been zeroed. Some of these valid bits may
- * have already been set.
- */
- vm_page_set_valid_range(m, base, size);
-
- /*
- * Round "base" to the next block boundary so that the
- * dirty bit for a partially zeroed block is not
- * cleared.
- */
- base = roundup2(base, DEV_BSIZE);
-
- vm_page_clear_dirty(m, base, PAGE_SIZE - base);
- } else if ((length & PAGE_MASK) &&
- __predict_false(object->cache != NULL)) {
- vm_page_cache_free(object, OFF_TO_IDX(length),
- nobjsize);
- }
} else {
-
/* Attempt to reserve the swap */
delta = ptoa(nobjsize - object->size);
if (!swap_reserve_by_cred(delta, object->cred)) {
OpenPOWER on IntegriCloud