From 73cf913d5f9c8b5932556b4f8630059a82fe0b22 Mon Sep 17 00:00:00 2001 From: phk Date: Sun, 19 Sep 2004 08:14:55 +0000 Subject: The getpages VOP was a good stab at getting scatter/gather I/O without too much kernel copying, but it is not the right way to do it, and it is in the way for straightening out the buffer cache. The right way is to pass the VM page array down through the struct bio to the disk device driver and DMA directly in to/out off the physical memory. Once the VM/buf thing is sorted out it is next on the list. Retire most of vnode method. ffs_getpages(). It is not clear if what is left shouldn't be in the default implementation which we now fall back to. Retire specfs_getpages() as well, as it has no users now. --- sys/fs/specfs/spec_vnops.c | 181 --------------------------------------------- sys/ufs/ffs/ffs_vnops.c | 117 ++--------------------------- 2 files changed, 5 insertions(+), 293 deletions(-) (limited to 'sys') diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index 79a924f..5133496 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -55,7 +55,6 @@ static int spec_advlock(struct vop_advlock_args *); static int spec_close(struct vop_close_args *); static int spec_fsync(struct vop_fsync_args *); -static int spec_getpages(struct vop_getpages_args *); static int spec_ioctl(struct vop_ioctl_args *); static int spec_kqfilter(struct vop_kqfilter_args *); static int spec_open(struct vop_open_args *); @@ -74,7 +73,6 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_close_desc, (vop_t *) spec_close }, { &vop_create_desc, (vop_t *) vop_panic }, { &vop_fsync_desc, (vop_t *) spec_fsync }, - { &vop_getpages_desc, (vop_t *) spec_getpages }, { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount }, { &vop_ioctl_desc, (vop_t *) spec_ioctl }, { &vop_kqfilter_desc, (vop_t *) spec_kqfilter }, @@ -640,182 +638,3 @@ spec_advlock(ap) return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } - -static int -spec_getpages(ap) - struct vop_getpages_args *ap; -{ - vm_offset_t kva; - int error; - int i, pcount, size, s; - daddr_t blkno; - struct buf *bp; - vm_page_t m; - vm_ooffset_t offset; - int toff, nextoff, nread; - struct vnode *vp = ap->a_vp; - int blksiz; - int gotreqpage; - - GIANT_REQUIRED; - - error = 0; - pcount = round_page(ap->a_count) / PAGE_SIZE; - - /* - * Calculate the offset of the transfer and do a sanity check. - * FreeBSD currently only supports an 8 TB range due to b_blkno - * being in DEV_BSIZE ( usually 512 ) byte chunks on call to - * VOP_STRATEGY. XXX - */ - offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; - blkno = btodb(offset); - - /* - * Round up physical size for real devices. We cannot round using - * v_mount's block size data because v_mount has nothing to do with - * the device. i.e. it's usually '/dev'. We need the physical block - * size for the device itself. - * - * We can't use v_rdev->si_mountpoint because it only exists when the - * block device is mounted. However, we can use v_rdev. - */ - - if (vn_isdisk(vp, NULL)) - blksiz = vp->v_rdev->si_bsize_phys; - else - blksiz = DEV_BSIZE; - - size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); - - bp = getpbuf(NULL); - kva = (vm_offset_t)bp->b_data; - - /* - * Map the pages to be read into the kva. - */ - pmap_qenter(kva, ap->a_m, pcount); - - /* Build a minimal buffer header. */ - bp->b_iocmd = BIO_READ; - bp->b_iodone = bdone; - - KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); - KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); - bp->b_rcred = crhold(curthread->td_ucred); - bp->b_wcred = crhold(curthread->td_ucred); - bp->b_iooffset = offset; - bp->b_blkno = blkno; - bp->b_lblkno = blkno; - pbgetvp(ap->a_vp, bp); - bp->b_bcount = size; - bp->b_bufsize = size; - bp->b_resid = 0; - bp->b_runningbufspace = bp->b_bufsize; - runningbufspace += bp->b_runningbufspace; - - cnt.v_vnodein++; - cnt.v_vnodepgsin += pcount; - - /* Do the input. */ - spec_xstrategy(bp->b_vp, bp); - - s = splbio(); - bwait(bp, PVM, "spread"); - splx(s); - - if ((bp->b_ioflags & BIO_ERROR) != 0) { - if (bp->b_error) - error = bp->b_error; - else - error = EIO; - } - - nread = size - bp->b_resid; - - if (nread < ap->a_count) { - bzero((caddr_t)kva + nread, - ap->a_count - nread); - } - pmap_qremove(kva, pcount); - - gotreqpage = 0; - /* - * While the page is busy, its object field is immutable. - */ - VM_OBJECT_LOCK(ap->a_m[ap->a_reqpage]->object); - vm_page_lock_queues(); - for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { - nextoff = toff + PAGE_SIZE; - m = ap->a_m[i]; - - if (nextoff <= nread) { - m->valid = VM_PAGE_BITS_ALL; - vm_page_undirty(m); - } else if (toff < nread) { - /* - * Since this is a VM request, we have to supply the - * unaligned offset to allow vm_page_set_validclean() - * to zero sub-DEV_BSIZE'd portions of the page. - */ - vm_page_set_validclean(m, 0, nread - toff); - } else { - m->valid = 0; - vm_page_undirty(m); - } - - if (i != ap->a_reqpage) { - /* - * Just in case someone was asking for this page we - * now tell them that it is ok to use. - */ - if (!error || (m->valid == VM_PAGE_BITS_ALL)) { - if (m->valid) { - if (m->flags & PG_WANTED) { - vm_page_activate(m); - } else { - vm_page_deactivate(m); - } - vm_page_wakeup(m); - } else { - vm_page_free(m); - } - } else { - vm_page_free(m); - } - } else if (m->valid) { - gotreqpage = 1; - /* - * Since this is a VM request, we need to make the - * entire page presentable by zeroing invalid sections. - */ - if (m->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(m, FALSE); - } - } - vm_page_unlock_queues(); - if (!gotreqpage) { - m = ap->a_m[ap->a_reqpage]; - printf( - "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n", - devtoname(bp->b_dev), error, bp, bp->b_vp); - printf( - " size: %d, resid: %ld, a_count: %d, valid: 0x%lx\n", - size, bp->b_resid, ap->a_count, (u_long)m->valid); - printf( - " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", - nread, ap->a_reqpage, (u_long)m->pindex, pcount); - VM_OBJECT_UNLOCK(m->object); - /* - * Free the buffer header back to the swap buffer pool. - */ - relpbuf(bp, NULL); - return VM_PAGER_ERROR; - } - VM_OBJECT_UNLOCK(ap->a_m[ap->a_reqpage]->object); - /* - * Free the buffer header back to the swap buffer pool. - */ - relpbuf(bp, NULL); - return VM_PAGER_OK; -} diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index bddd2e4..4f44688 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -746,20 +746,9 @@ static int ffs_getpages(ap) struct vop_getpages_args *ap; { - off_t foff, physoffset; - int i, size, bsize; - struct vnode *dp, *vp; - vm_object_t obj; - vm_pindex_t pindex; + int i; vm_page_t mreq; - int bbackwards, bforwards; - int pbackwards, pforwards; - int firstpage; - ufs2_daddr_t reqblkno, reqlblkno; - int poff; int pcount; - int rtval; - int pagesperblock; GIANT_REQUIRED; @@ -787,109 +776,13 @@ ffs_getpages(ap) return VM_PAGER_OK; } VM_OBJECT_UNLOCK(mreq->object); - vp = ap->a_vp; - obj = vp->v_object; - bsize = vp->v_mount->mnt_stat.f_iosize; - pindex = mreq->pindex; - foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */; - - if (bsize < PAGE_SIZE) - return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, - ap->a_count, - ap->a_reqpage); - - /* - * foff is the file offset of the required page - * reqlblkno is the logical block that contains the page - * poff is the index of the page into the logical block - */ - reqlblkno = foff / bsize; - poff = (foff % bsize) / PAGE_SIZE; - - dp = VTOI(vp)->i_devvp; - if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) - || (reqblkno == -1)) { - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for(i = 0; i < pcount; i++) { - if (i != ap->a_reqpage) - vm_page_free(ap->a_m[i]); - } - vm_page_unlock_queues(); - if (reqblkno == -1) { - if ((mreq->flags & PG_ZERO) == 0) - pmap_zero_page(mreq); - vm_page_undirty(mreq); - mreq->valid = VM_PAGE_BITS_ALL; - VM_OBJECT_UNLOCK(obj); - return VM_PAGER_OK; - } else { - VM_OBJECT_UNLOCK(obj); - return VM_PAGER_ERROR; - } - } - - physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; - pagesperblock = bsize / PAGE_SIZE; - /* - * find the first page that is contiguous... - * note that pbackwards is the number of pages that are contiguous - * backwards. - */ - firstpage = 0; - if (ap->a_count) { - pbackwards = poff + bbackwards * pagesperblock; - if (ap->a_reqpage > pbackwards) { - firstpage = ap->a_reqpage - pbackwards; - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for(i=0;ia_m[i]); - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(obj); - } - /* - * pforwards is the number of pages that are contiguous - * after the current page. - */ - pforwards = (pagesperblock - (poff + 1)) + - bforwards * pagesperblock; - if (pforwards < (pcount - (ap->a_reqpage + 1))) { - VM_OBJECT_LOCK(obj); - vm_page_lock_queues(); - for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) - vm_page_free(ap->a_m[i]); - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(obj); - pcount = ap->a_reqpage + pforwards + 1; - } - - /* - * number of pages for I/O corrected for the non-contig pages at - * the beginning of the array. - */ - pcount -= firstpage; - } - - /* - * calculate the size of the transfer - */ - - size = pcount * PAGE_SIZE; - - if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > - obj->un_pager.vnp.vnp_size) - size = obj->un_pager.vnp.vnp_size - - IDX_TO_OFF(ap->a_m[firstpage]->pindex); - - physoffset -= foff; - rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, - (ap->a_reqpage - firstpage), physoffset); - - return (rtval); + return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, + ap->a_count, + ap->a_reqpage); } + /* * Extended attribute area reading. */ -- cgit v1.1