From e7be7a0432de3e374a6d4cfedc0ef5c8b264a021 Mon Sep 17 00:00:00 2001 From: dillon Date: Mon, 20 Jan 2003 17:46:48 +0000 Subject: Close the remaining user address mapping races for physical I/O, CAM, and AIO. Still TODO: streamline useracc() checks. Reviewed by: alc, tegge MFC after: 7 days --- sys/cam/cam_periph.c | 26 +++++++++++++++++++++++--- sys/kern/kern_physio.c | 12 +++++++++++- sys/kern/vfs_aio.c | 15 +++++++++++++-- sys/kern/vfs_bio.c | 32 ++++++++++++++++++++++++++------ sys/sys/buf.h | 2 +- sys/vm/vm_glue.c | 12 ++++++++++++ sys/vm/vm_map.c | 11 ++++++++--- 7 files changed, 94 insertions(+), 16 deletions(-) diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index 5bab8fb..247ebb8 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -534,7 +534,7 @@ cam_periph_unlock(struct cam_periph *periph) int cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo) { - int numbufs, i; + int numbufs, i, j; int flags[CAM_PERIPH_MAXMAPS]; u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS]; u_int32_t lengths[CAM_PERIPH_MAXMAPS]; @@ -659,8 +659,28 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo) /* set the direction */ mapinfo->bp[i]->b_iocmd = flags[i]; - /* map the buffer into kernel memory */ - vmapbuf(mapinfo->bp[i]); + /* + * Map the buffer into kernel memory. + * + * Note that useracc() alone is not a sufficient test. + * vmapbuf() can still fail due to a smaller file mapped + * into a larger area of VM, or if userland races against + * vmapbuf() after the useracc() check. + */ + if (vmapbuf(mapinfo->bp[i]) < 0) { + printf("cam_periph_mapmem: error, " + "address %p, length %lu isn't " + "user accessible any more\n", + (void *)*data_ptrs[i], + (u_long)lengths[i]); + for (j = 0; j < i; ++j) { + *data_ptrs[j] = mapinfo->bp[j]->b_saveaddr; + mapinfo->bp[j]->b_flags &= ~B_PHYS; + relpbuf(mapinfo->bp[j], NULL); + } + PRELE(curproc); + return(EACCES); + } /* set our pointer to the new mapped area */ *data_ptrs[i] = mapinfo->bp[i]->b_data; diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c index f61b55c..01e3750 100644 --- a/sys/kern/kern_physio.c +++ b/sys/kern/kern_physio.c @@ -95,13 +95,23 @@ physio(dev_t dev, struct uio *uio, int ioflag) bp->b_blkno = btodb(bp->b_offset); if (uio->uio_segflg == UIO_USERSPACE) { + /* + * Note that useracc() alone is not a + * sufficient test. vmapbuf() can still fail + * due to a smaller file mapped into a larger + * area of VM, or if userland races against + * vmapbuf() after the useracc() check. + */ if (!useracc(bp->b_data, bp->b_bufsize, bp->b_iocmd == BIO_READ ? VM_PROT_WRITE : VM_PROT_READ)) { error = EFAULT; goto doerror; } - vmapbuf(bp); + if (vmapbuf(bp) < 0) { + error = EFAULT; + goto doerror; + } } DEV_STRATEGY(bp); diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index bffdf71..9532ff2 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1124,8 +1124,19 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) } } - /* Bring buffer into kernel space. */ - vmapbuf(bp); + /* + * Bring buffer into kernel space. + * + * Note that useracc() alone is not a + * sufficient test. vmapbuf() can still fail + * due to a smaller file mapped into a larger + * area of VM, or if userland races against + * vmapbuf() after the useracc() check. + */ + if (vmapbuf(bp) < 0) { + error = EFAULT; + goto doerror; + } s = splbio(); aiocbe->bp = bp; diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index b8a156c..9de4fd1 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3547,13 +3547,18 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) * All requests are (re)mapped into kernel VA space. * Notice that we use b_bufsize for the size of the buffer * to be mapped. b_bcount might be modified by the driver. + * + * Note that even if the caller determines that the address space should + * be valid, a race or a smaller-file mapped into a larger space may + * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST + * check the return value. */ -void +int vmapbuf(struct buf *bp) { caddr_t addr, kva; vm_offset_t pa; - int pidx; + int pidx, i; struct vm_page *m; struct pmap *pmap = &curproc->p_vmspace->vm_pmap; @@ -3573,11 +3578,25 @@ vmapbuf(struct buf *bp) * the userland address space, and kextract is only guarenteed * to work for the kernland address space (see: sparc64 port). */ - vm_fault_quick((addr >= bp->b_data) ? addr : bp->b_data, - (bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ); +retry: + i = vm_fault_quick((addr >= bp->b_data) ? addr : bp->b_data, + (bp->b_iocmd == BIO_READ) ? + (VM_PROT_READ|VM_PROT_WRITE) : VM_PROT_READ); + if (i < 0) { + printf("vmapbuf: warning, bad user address during I/O\n"); + vm_page_lock_queues(); + for (i = 0; i < pidx; ++i) { + vm_page_unhold(bp->b_pages[i]); + bp->b_pages[i] = NULL; + } + vm_page_unlock_queues(); + return(-1); + } pa = trunc_page(pmap_extract(pmap, (vm_offset_t) addr)); - if (pa == 0) - panic("vmapbuf: page not present"); + if (pa == 0) { + printf("vmapbuf: warning, race against user address during I/O"); + goto retry; + } m = PHYS_TO_VM_PAGE(pa); vm_page_lock_queues(); vm_page_hold(m); @@ -3592,6 +3611,7 @@ vmapbuf(struct buf *bp) bp->b_npages = pidx; bp->b_saveaddr = bp->b_data; bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK); + return(0); } /* diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 6764ca0..f6bfc21 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -499,7 +499,7 @@ void vfs_bio_clrbuf(struct buf *); void vfs_busy_pages(struct buf *, int clear_modify); void vfs_unbusy_pages(struct buf *); void vwakeup(struct buf *); -void vmapbuf(struct buf *); +int vmapbuf(struct buf *); void vunmapbuf(struct buf *); void relpbuf(struct buf *, int *); void brelvp(struct buf *); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 81451c1..92e1005 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -121,6 +121,12 @@ static void vm_proc_swapout(struct proc *p); /* * MPSAFE + * + * WARNING! This code calls vm_map_check_protection() which only checks + * the associated vm_map_entry range. It does not determine whether the + * contents of the memory is actually readable or writable. In most cases + * just checking the vm_map_entry is sufficient within the kernel's address + * space. */ int kernacc(addr, len, rw) @@ -142,6 +148,12 @@ kernacc(addr, len, rw) /* * MPSAFE + * + * WARNING! This code calls vm_map_check_protection() which only checks + * the associated vm_map_entry range. It does not determine whether the + * contents of the memory is actually readable or writable. vmapbuf(), + * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be + * used in conjuction with this call. */ int useracc(addr, len, rw) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 6c53b7f..f1f5b51 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2179,9 +2179,14 @@ vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) /* * vm_map_check_protection: * - * Assert that the target map allows the specified - * privilege on the entire address region given. - * The entire region must be allocated. + * Assert that the target map allows the specified privilege on the + * entire address region given. The entire region must be allocated. + * + * WARNING! This code does not and should not check whether the + * contents of the region is accessible. For example a smaller file + * might be mapped into a larger address space. + * + * NOTE! This code is also called by munmap(). */ boolean_t vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, -- cgit v1.1