diff options
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r-- | sys/kern/vfs_bio.c | 963 |
1 files changed, 701 insertions, 262 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index d20c829..cded596 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,8 +1,12 @@ /*- * Copyright (c) 2004 Poul-Henning Kamp * Copyright (c) 1994,1997 John S. Dyson + * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -92,6 +96,7 @@ struct buf_ops buf_ops_bio = { * carnal knowledge of buffers. This knowledge should be moved to vfs_bio.c. */ struct buf *buf; /* buffer header pool */ +caddr_t unmapped_buf; static struct proc *bufdaemonproc; @@ -132,6 +137,10 @@ SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD, SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, "Virtual memory used for buffers"); #endif +static long unmapped_bufspace; +SYSCTL_LONG(_vfs, OID_AUTO, unmapped_bufspace, CTLFLAG_RD, + &unmapped_bufspace, 0, + "Amount of unmapped buffers, inclusive in the bufspace"); static long maxbufspace; SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, "Maximum allowed value of bufspace (including buf_daemon)"); @@ -201,6 +210,10 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0, static int getnewbufrestarts; SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0, "Number of times getnewbuf has had to restart a buffer aquisition"); +static int mappingrestarts; +SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0, + "Number of times getblk has had to restart a buffer mapping for " + "unmapped buffer"); static int flushbufqtarget = 100; SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, "Amount of work to do in flushbufqueues when helping bufdaemon"); @@ -210,6 +223,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0, static long barrierwrites; SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, "Number of barrier writes"); +SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, + &unmapped_buf_allowed, 0, + "Permit the use of the unmapped i/o"); /* * Wakeup point for bufdaemon, as well as indicator of whether it is already @@ -281,6 +297,9 @@ static struct mtx nblock; /* Queues for free buffers with various properties */ static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; +#ifdef INVARIANTS +static int bq_len[BUFFER_QUEUES]; +#endif /* Lock for the bufqueues */ static struct mtx bqlock; @@ -511,7 +530,7 @@ caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) { int tuned_nbuf; - long maxbuf; + long maxbuf, maxbuf_sz, buf_sz, biotmap_sz; /* * physmem_est is in pages. Convert it to kilobytes (assumes @@ -555,6 +574,52 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) } /* + * Ideal allocation size for the transient bio submap if 10% + * of the maximal space buffer map. This roughly corresponds + * to the amount of the buffer mapped for typical UFS load. + * + * Clip the buffer map to reserve space for the transient + * BIOs, if its extent is bigger than 90% of the maximum + * buffer map extent on the platform. + * + * The fall-back to the maxbuf in case of maxbcache unset, + * allows to not trim the buffer KVA for the architectures + * with ample KVA space. + */ + if (bio_transient_maxcnt == 0) { + maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE; + buf_sz = (long)nbuf * BKVASIZE; + if (buf_sz < maxbuf_sz / 10 * 9) { + /* + * There is more KVA than memory. Do not + * adjust buffer map size, and assign the rest + * of maxbuf to transient map. + */ + biotmap_sz = maxbuf_sz - buf_sz; + } else { + /* + * Buffer map spans all KVA we could afford on + * this platform. Give 10% of the buffer map + * to the transient bio map. + */ + biotmap_sz = buf_sz / 10; + buf_sz -= biotmap_sz; + } + if (biotmap_sz / INT_MAX > MAXPHYS) + bio_transient_maxcnt = INT_MAX; + else + bio_transient_maxcnt = biotmap_sz / MAXPHYS; + /* + * Artifically limit to 1024 simultaneous in-flight I/Os + * using the transient mapping. + */ + if (bio_transient_maxcnt > 1024) + bio_transient_maxcnt = 1024; + if (tuned_nbuf) + nbuf = buf_sz / BKVASIZE; + } + + /* * swbufs are used as temporary holders for I/O, such as paging I/O. * We have no less then 16 and no more then 256. */ @@ -607,6 +672,9 @@ bufinit(void) LIST_INIT(&bp->b_dep); BUF_LOCKINIT(bp); TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); +#ifdef INVARIANTS + bq_len[QUEUE_EMPTY]++; +#endif } /* @@ -675,6 +743,55 @@ bufinit(void) bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); + unmapped_buf = (caddr_t)kmem_alloc_nofault(kernel_map, MAXPHYS); +} + +#ifdef INVARIANTS +static inline void +vfs_buf_check_mapped(struct buf *bp) +{ + + KASSERT((bp->b_flags & B_UNMAPPED) == 0, + ("mapped buf %p %x", bp, bp->b_flags)); + KASSERT(bp->b_kvabase != unmapped_buf, + ("mapped buf: b_kvabase was not updated %p", bp)); + KASSERT(bp->b_data != unmapped_buf, + ("mapped buf: b_data was not updated %p", bp)); +} + +static inline void +vfs_buf_check_unmapped(struct buf *bp) +{ + + KASSERT((bp->b_flags & B_UNMAPPED) == B_UNMAPPED, + ("unmapped buf %p %x", bp, bp->b_flags)); + KASSERT(bp->b_kvabase == unmapped_buf, + ("unmapped buf: corrupted b_kvabase %p", bp)); + KASSERT(bp->b_data == unmapped_buf, + ("unmapped buf: corrupted b_data %p", bp)); +} + +#define BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp) +#define BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp) +#else +#define BUF_CHECK_MAPPED(bp) do {} while (0) +#define BUF_CHECK_UNMAPPED(bp) do {} while (0) +#endif + +static void +bpmap_qenter(struct buf *bp) +{ + + BUF_CHECK_MAPPED(bp); + + /* + * bp->b_data is relative to bp->b_offset, but + * bp->b_offset may be offset into the first page. + */ + bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data); + pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | + (vm_offset_t)(bp->b_offset & PAGE_MASK)); } /* @@ -686,14 +803,26 @@ static void bfreekva(struct buf *bp) { - if (bp->b_kvasize) { - atomic_add_int(&buffreekvacnt, 1); - atomic_subtract_long(&bufspace, bp->b_kvasize); - vm_map_remove(buffer_map, (vm_offset_t) bp->b_kvabase, - (vm_offset_t) bp->b_kvabase + bp->b_kvasize); - bp->b_kvasize = 0; - bufspacewakeup(); + if (bp->b_kvasize == 0) + return; + + atomic_add_int(&buffreekvacnt, 1); + atomic_subtract_long(&bufspace, bp->b_kvasize); + if ((bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); + vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvabase, + (vm_offset_t)bp->b_kvabase + bp->b_kvasize); + } else { + BUF_CHECK_UNMAPPED(bp); + if ((bp->b_flags & B_KVAALLOC) != 0) { + vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvaalloc, + (vm_offset_t)bp->b_kvaalloc + bp->b_kvasize); + } + atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize); + bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC); } + bp->b_kvasize = 0; + bufspacewakeup(); } /* @@ -760,6 +889,11 @@ bremfreel(struct buf *bp) mtx_assert(&bqlock, MA_OWNED); TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); +#ifdef INVARIANTS + KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow", + bp->b_qindex)); + bq_len[bp->b_qindex]--; +#endif bp->b_qindex = QUEUE_NONE; /* * If this was a delayed bremfree() we only need to remove the buffer @@ -1414,7 +1548,8 @@ brelse(struct buf *bp) } } - if ((bp->b_flags & B_INVAL) == 0) { + if ((bp->b_flags & (B_INVAL | B_UNMAPPED)) == 0) { + BUF_CHECK_MAPPED(bp); pmap_qenter( trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); @@ -1509,11 +1644,17 @@ brelse(struct buf *bp) bp->b_qindex = QUEUE_DIRTY; else bp->b_qindex = QUEUE_CLEAN; - if (bp->b_flags & B_AGE) - TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist); - else - TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); + if (bp->b_flags & B_AGE) { + TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, + b_freelist); + } else { + TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, + b_freelist); + } } +#ifdef INVARIANTS + bq_len[bp->b_qindex]++; +#endif mtx_unlock(&bqlock); /* @@ -1604,6 +1745,9 @@ bqrelse(struct buf *bp) if (bp->b_flags & B_DELWRI) { bp->b_qindex = QUEUE_DIRTY; TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); +#ifdef INVARIANTS + bq_len[bp->b_qindex]++; +#endif } else { /* * The locking of the BO_LOCK for checking of the @@ -1616,6 +1760,9 @@ bqrelse(struct buf *bp) bp->b_qindex = QUEUE_CLEAN; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist); +#ifdef INVARIANTS + bq_len[QUEUE_CLEAN]++; +#endif } else { /* * We are too low on memory, we have to try to free @@ -1657,7 +1804,11 @@ vfs_vmio_release(struct buf *bp) int i; vm_page_t m; - pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages); + if ((bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); + pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages); + } else + BUF_CHECK_UNMAPPED(bp); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; @@ -1761,8 +1912,10 @@ vfs_bio_awrite(struct buf *bp) int nwritten; int size; int maxcl; + int gbflags; bo = &vp->v_bufobj; + gbflags = (bp->b_flags & B_UNMAPPED) != 0 ? GB_UNMAPPED : 0; /* * right now we support clustered writing only to regular files. If * we find a clusterable block we could be in the middle of a cluster @@ -1794,7 +1947,7 @@ vfs_bio_awrite(struct buf *bp) if (ncl != 1) { BUF_UNLOCK(bp); nwritten = cluster_wbuild(vp, size, lblkno - j, ncl, - 0); + gbflags); return (nwritten); } } @@ -1811,46 +1964,207 @@ vfs_bio_awrite(struct buf *bp) return (nwritten); } +static void +setbufkva(struct buf *bp, vm_offset_t addr, int maxsize, int gbflags) +{ + + KASSERT((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 && + bp->b_kvasize == 0, ("call bfreekva(%p)", bp)); + if ((gbflags & GB_UNMAPPED) == 0) { + bp->b_kvabase = (caddr_t)addr; + } else if ((gbflags & GB_KVAALLOC) != 0) { + KASSERT((gbflags & GB_UNMAPPED) != 0, + ("GB_KVAALLOC without GB_UNMAPPED")); + bp->b_kvaalloc = (caddr_t)addr; + bp->b_flags |= B_UNMAPPED | B_KVAALLOC; + atomic_add_long(&unmapped_bufspace, bp->b_kvasize); + } + bp->b_kvasize = maxsize; +} + /* - * getnewbuf: - * - * Find and initialize a new buffer header, freeing up existing buffers - * in the bufqueues as necessary. The new buffer is returned locked. - * - * Important: B_INVAL is not set. If the caller wishes to throw the - * buffer away, the caller must set B_INVAL prior to calling brelse(). - * - * We block if: - * We have insufficient buffer headers - * We have insufficient buffer space - * buffer_map is too fragmented ( space reservation fails ) - * If we have to flush dirty buffers ( but we try to avoid this ) - * - * To avoid VFS layer recursion we do not flush dirty buffers ourselves. - * Instead we ask the buf daemon to do it for us. We attempt to - * avoid piecemeal wakeups of the pageout daemon. + * Allocate the buffer KVA and set b_kvasize. Also set b_kvabase if + * needed. */ +static int +allocbufkva(struct buf *bp, int maxsize, int gbflags) +{ + vm_offset_t addr; + int rv; -static struct buf * -getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize, - int gbflags) + bfreekva(bp); + addr = 0; + + vm_map_lock(buffer_map); + if (vm_map_findspace(buffer_map, vm_map_min(buffer_map), maxsize, + &addr)) { + vm_map_unlock(buffer_map); + /* + * Buffer map is too fragmented. Request the caller + * to defragment the map. + */ + atomic_add_int(&bufdefragcnt, 1); + return (1); + } + rv = vm_map_insert(buffer_map, NULL, 0, addr, addr + maxsize, + VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); + KASSERT(rv == KERN_SUCCESS, ("vm_map_insert(buffer_map) rv %d", rv)); + vm_map_unlock(buffer_map); + setbufkva(bp, addr, maxsize, gbflags); + atomic_add_long(&bufspace, bp->b_kvasize); + return (0); +} + +/* + * Ask the bufdaemon for help, or act as bufdaemon itself, when a + * locked vnode is supplied. + */ +static void +getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, + int defrag) { struct thread *td; - struct buf *bp; - struct buf *nbp; - int defrag = 0; - int nqindex; - static int flushingbufs; + char *waitmsg; + int fl, flags, norunbuf; + + mtx_assert(&bqlock, MA_OWNED); + + if (defrag) { + flags = VFS_BIO_NEED_BUFSPACE; + waitmsg = "nbufkv"; + } else if (bufspace >= hibufspace) { + waitmsg = "nbufbs"; + flags = VFS_BIO_NEED_BUFSPACE; + } else { + waitmsg = "newbuf"; + flags = VFS_BIO_NEED_ANY; + } + mtx_lock(&nblock); + needsbuffer |= flags; + mtx_unlock(&nblock); + mtx_unlock(&bqlock); + + bd_speedup(); /* heeeelp */ + if ((gbflags & GB_NOWAIT_BD) != 0) + return; td = curthread; + mtx_lock(&nblock); + while (needsbuffer & flags) { + if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) { + mtx_unlock(&nblock); + /* + * getblk() is called with a vnode locked, and + * some majority of the dirty buffers may as + * well belong to the vnode. Flushing the + * buffers there would make a progress that + * cannot be achieved by the buf_daemon, that + * cannot lock the vnode. + */ + norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | + (td->td_pflags & TDP_NORUNNINGBUF); + /* play bufdaemon */ + td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; + fl = buf_do_flush(vp); + td->td_pflags &= norunbuf; + mtx_lock(&nblock); + if (fl != 0) + continue; + if ((needsbuffer & flags) == 0) + break; + } + if (msleep(&needsbuffer, &nblock, (PRIBIO + 4) | slpflag, + waitmsg, slptimeo)) + break; + } + mtx_unlock(&nblock); +} + +static void +getnewbuf_reuse_bp(struct buf *bp, int qindex) +{ + + CTR6(KTR_BUF, "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d " + "queue %d (recycling)", bp, bp->b_vp, bp->b_flags, + bp->b_kvasize, bp->b_bufsize, qindex); + mtx_assert(&bqlock, MA_NOTOWNED); + /* - * We can't afford to block since we might be holding a vnode lock, - * which may prevent system daemons from running. We deal with - * low-memory situations by proactively returning memory and running - * async I/O rather then sync I/O. + * Note: we no longer distinguish between VMIO and non-VMIO + * buffers. */ - atomic_add_int(&getnewbufcalls, 1); - atomic_subtract_int(&getnewbufrestarts, 1); + KASSERT((bp->b_flags & B_DELWRI) == 0, + ("delwri buffer %p found in queue %d", bp, qindex)); + + if (qindex == QUEUE_CLEAN) { + if (bp->b_flags & B_VMIO) { + bp->b_flags &= ~B_ASYNC; + vfs_vmio_release(bp); + } + if (bp->b_vp != NULL) + brelvp(bp); + } + + /* + * Get the rest of the buffer freed up. b_kva* is still valid + * after this operation. + */ + + if (bp->b_rcred != NOCRED) { + crfree(bp->b_rcred); + bp->b_rcred = NOCRED; + } + if (bp->b_wcred != NOCRED) { + crfree(bp->b_wcred); + bp->b_wcred = NOCRED; + } + if (!LIST_EMPTY(&bp->b_dep)) + buf_deallocate(bp); + if (bp->b_vflags & BV_BKGRDINPROG) + panic("losing buffer 3"); + KASSERT(bp->b_vp == NULL, ("bp: %p still has vnode %p. qindex: %d", + bp, bp->b_vp, qindex)); + KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0, + ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags)); + + if (bp->b_bufsize) + allocbuf(bp, 0); + + bp->b_flags &= B_UNMAPPED | B_KVAALLOC; + bp->b_ioflags = 0; + bp->b_xflags = 0; + KASSERT((bp->b_vflags & BV_INFREECNT) == 0, + ("buf %p still counted as free?", bp)); + bp->b_vflags = 0; + bp->b_vp = NULL; + bp->b_blkno = bp->b_lblkno = 0; + bp->b_offset = NOOFFSET; + bp->b_iodone = 0; + bp->b_error = 0; + bp->b_resid = 0; + bp->b_bcount = 0; + bp->b_npages = 0; + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_bufobj = NULL; + bp->b_pin_count = 0; + bp->b_fsprivate1 = NULL; + bp->b_fsprivate2 = NULL; + bp->b_fsprivate3 = NULL; + + LIST_INIT(&bp->b_dep); +} + +static int flushingbufs; + +static struct buf * +getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata) +{ + struct buf *bp, *nbp; + int nqindex, qindex, pass; + + KASSERT(!unmapped || !defrag, ("both unmapped and defrag")); + + pass = 1; restart: atomic_add_int(&getnewbufrestarts, 1); @@ -1860,66 +2174,90 @@ restart: * that if we are specially marked process, we are allowed to * dip into our reserves. * - * The scanning sequence is nominally: EMPTY->EMPTYKVA->CLEAN + * The scanning sequence is nominally: EMPTY->EMPTYKVA->CLEAN + * for the allocation of the mapped buffer. For unmapped, the + * easiest is to start with EMPTY outright. * * We start with EMPTYKVA. If the list is empty we backup to EMPTY. * However, there are a number of cases (defragging, reusing, ...) * where we cannot backup. */ + nbp = NULL; mtx_lock(&bqlock); - nqindex = QUEUE_EMPTYKVA; - nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]); - + if (!defrag && unmapped) { + nqindex = QUEUE_EMPTY; + nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]); + } if (nbp == NULL) { - /* - * If no EMPTYKVA buffers and we are either - * defragging or reusing, locate a CLEAN buffer - * to free or reuse. If bufspace useage is low - * skip this step so we can allocate a new buffer. - */ - if (defrag || bufspace >= lobufspace) { - nqindex = QUEUE_CLEAN; - nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); - } + nqindex = QUEUE_EMPTYKVA; + nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]); + } - /* - * If we could not find or were not allowed to reuse a - * CLEAN buffer, check to see if it is ok to use an EMPTY - * buffer. We can only use an EMPTY buffer if allocating - * its KVA would not otherwise run us out of buffer space. - */ - if (nbp == NULL && defrag == 0 && - bufspace + maxsize < hibufspace) { - nqindex = QUEUE_EMPTY; - nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]); - } + /* + * If no EMPTYKVA buffers and we are either defragging or + * reusing, locate a CLEAN buffer to free or reuse. If + * bufspace useage is low skip this step so we can allocate a + * new buffer. + */ + if (nbp == NULL && (defrag || bufspace >= lobufspace)) { + nqindex = QUEUE_CLEAN; + nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); + } + + /* + * If we could not find or were not allowed to reuse a CLEAN + * buffer, check to see if it is ok to use an EMPTY buffer. + * We can only use an EMPTY buffer if allocating its KVA would + * not otherwise run us out of buffer space. No KVA is needed + * for the unmapped allocation. + */ + if (nbp == NULL && defrag == 0 && (bufspace + maxsize < hibufspace || + metadata)) { + nqindex = QUEUE_EMPTY; + nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]); + } + + /* + * All available buffers might be clean, retry ignoring the + * lobufspace as the last resort. + */ + if (nbp == NULL && !TAILQ_EMPTY(&bufqueues[QUEUE_CLEAN])) { + nqindex = QUEUE_CLEAN; + nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); } /* * Run scan, possibly freeing data and/or kva mappings on the fly * depending. */ - while ((bp = nbp) != NULL) { - int qindex = nqindex; + qindex = nqindex; /* - * Calculate next bp ( we can only use it if we do not block - * or do other fancy things ). + * Calculate next bp (we can only use it if we do not + * block or do other fancy things). */ if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) { - switch(qindex) { + switch (qindex) { case QUEUE_EMPTY: nqindex = QUEUE_EMPTYKVA; - if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]))) + nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]); + if (nbp != NULL) break; /* FALLTHROUGH */ case QUEUE_EMPTYKVA: nqindex = QUEUE_CLEAN; - if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]))) + nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); + if (nbp != NULL) break; /* FALLTHROUGH */ case QUEUE_CLEAN: + if (metadata && pass == 1) { + pass = 2; + nqindex = QUEUE_EMPTY; + nbp = TAILQ_FIRST( + &bufqueues[QUEUE_EMPTY]); + } /* * nbp is NULL. */ @@ -1952,22 +2290,9 @@ restart: } BO_UNLOCK(bp->b_bufobj); } - CTR6(KTR_BUF, - "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d " - "queue %d (recycling)", bp, bp->b_vp, bp->b_flags, - bp->b_kvasize, bp->b_bufsize, qindex); - - /* - * Sanity Checks - */ - KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp)); - - /* - * Note: we no longer distinguish between VMIO and non-VMIO - * buffers. - */ - KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex)); + KASSERT(bp->b_qindex == qindex, + ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); if (bp->b_bufobj != NULL) BO_LOCK(bp->b_bufobj); @@ -1975,68 +2300,13 @@ restart: if (bp->b_bufobj != NULL) BO_UNLOCK(bp->b_bufobj); mtx_unlock(&bqlock); - - if (qindex == QUEUE_CLEAN) { - if (bp->b_flags & B_VMIO) { - bp->b_flags &= ~B_ASYNC; - vfs_vmio_release(bp); - } - if (bp->b_vp) - brelvp(bp); - } - /* * NOTE: nbp is now entirely invalid. We can only restart * the scan from this point on. - * - * Get the rest of the buffer freed up. b_kva* is still - * valid after this operation. */ - if (bp->b_rcred != NOCRED) { - crfree(bp->b_rcred); - bp->b_rcred = NOCRED; - } - if (bp->b_wcred != NOCRED) { - crfree(bp->b_wcred); - bp->b_wcred = NOCRED; - } - if (!LIST_EMPTY(&bp->b_dep)) - buf_deallocate(bp); - if (bp->b_vflags & BV_BKGRDINPROG) - panic("losing buffer 3"); - KASSERT(bp->b_vp == NULL, - ("bp: %p still has vnode %p. qindex: %d", - bp, bp->b_vp, qindex)); - KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0, - ("bp: %p still on a buffer list. xflags %X", - bp, bp->b_xflags)); - - if (bp->b_bufsize) - allocbuf(bp, 0); - - bp->b_flags = 0; - bp->b_ioflags = 0; - bp->b_xflags = 0; - KASSERT((bp->b_vflags & BV_INFREECNT) == 0, - ("buf %p still counted as free?", bp)); - bp->b_vflags = 0; - bp->b_vp = NULL; - bp->b_blkno = bp->b_lblkno = 0; - bp->b_offset = NOOFFSET; - bp->b_iodone = 0; - bp->b_error = 0; - bp->b_resid = 0; - bp->b_bcount = 0; - bp->b_npages = 0; - bp->b_dirtyoff = bp->b_dirtyend = 0; - bp->b_bufobj = NULL; - bp->b_pin_count = 0; - bp->b_fsprivate1 = NULL; - bp->b_fsprivate2 = NULL; - bp->b_fsprivate3 = NULL; - - LIST_INIT(&bp->b_dep); + getnewbuf_reuse_bp(bp, qindex); + mtx_assert(&bqlock, MA_NOTOWNED); /* * If we are defragging then free the buffer. @@ -2060,6 +2330,9 @@ restart: goto restart; } + if (metadata) + break; + /* * If we are overcomitted then recover the buffer and its * KVM space. This occurs in rare situations when multiple @@ -2077,6 +2350,59 @@ restart: flushingbufs = 0; break; } + return (bp); +} + +/* + * getnewbuf: + * + * Find and initialize a new buffer header, freeing up existing buffers + * in the bufqueues as necessary. The new buffer is returned locked. + * + * Important: B_INVAL is not set. If the caller wishes to throw the + * buffer away, the caller must set B_INVAL prior to calling brelse(). + * + * We block if: + * We have insufficient buffer headers + * We have insufficient buffer space + * buffer_map is too fragmented ( space reservation fails ) + * If we have to flush dirty buffers ( but we try to avoid this ) + * + * To avoid VFS layer recursion we do not flush dirty buffers ourselves. + * Instead we ask the buf daemon to do it for us. We attempt to + * avoid piecemeal wakeups of the pageout daemon. + */ +static struct buf * +getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize, + int gbflags) +{ + struct buf *bp; + int defrag, metadata; + + KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, + ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); + if (!unmapped_buf_allowed) + gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); + + defrag = 0; + if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || + vp->v_type == VCHR) + metadata = 1; + else + metadata = 0; + /* + * We can't afford to block since we might be holding a vnode lock, + * which may prevent system daemons from running. We deal with + * low-memory situations by proactively returning memory and running + * async I/O rather then sync I/O. + */ + atomic_add_int(&getnewbufcalls, 1); + atomic_subtract_int(&getnewbufrestarts, 1); +restart: + bp = getnewbuf_scan(maxsize, defrag, (gbflags & (GB_UNMAPPED | + GB_KVAALLOC)) == GB_UNMAPPED, metadata); + if (bp != NULL) + defrag = 0; /* * If we exhausted our list, sleep as appropriate. We may have to @@ -2084,65 +2410,23 @@ restart: * * Generally we are sleeping due to insufficient buffer space. */ - if (bp == NULL) { - int flags, norunbuf; - char *waitmsg; - int fl; - - if (defrag) { - flags = VFS_BIO_NEED_BUFSPACE; - waitmsg = "nbufkv"; - } else if (bufspace >= hibufspace) { - waitmsg = "nbufbs"; - flags = VFS_BIO_NEED_BUFSPACE; - } else { - waitmsg = "newbuf"; - flags = VFS_BIO_NEED_ANY; - } - mtx_lock(&nblock); - needsbuffer |= flags; - mtx_unlock(&nblock); - mtx_unlock(&bqlock); - - bd_speedup(); /* heeeelp */ - if (gbflags & GB_NOWAIT_BD) - return (NULL); - - mtx_lock(&nblock); - while (needsbuffer & flags) { - if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) { - mtx_unlock(&nblock); - /* - * getblk() is called with a vnode - * locked, and some majority of the - * dirty buffers may as well belong to - * the vnode. Flushing the buffers - * there would make a progress that - * cannot be achieved by the - * buf_daemon, that cannot lock the - * vnode. - */ - norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | - (td->td_pflags & TDP_NORUNNINGBUF); - /* play bufdaemon */ - td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; - fl = buf_do_flush(vp); - td->td_pflags &= norunbuf; - mtx_lock(&nblock); - if (fl != 0) - continue; - if ((needsbuffer & flags) == 0) - break; - } - if (msleep(&needsbuffer, &nblock, - (PRIBIO + 4) | slpflag, waitmsg, slptimeo)) { - mtx_unlock(&nblock); - return (NULL); - } - } - mtx_unlock(&nblock); + mtx_assert(&bqlock, MA_OWNED); + getnewbuf_bufd_help(vp, gbflags, slpflag, slptimeo, defrag); + mtx_assert(&bqlock, MA_NOTOWNED); + } else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED) { + mtx_assert(&bqlock, MA_NOTOWNED); + + bfreekva(bp); + bp->b_flags |= B_UNMAPPED; + bp->b_kvabase = bp->b_data = unmapped_buf; + bp->b_kvasize = maxsize; + atomic_add_long(&bufspace, bp->b_kvasize); + atomic_add_long(&unmapped_bufspace, bp->b_kvasize); + atomic_add_int(&bufreusecnt, 1); } else { + mtx_assert(&bqlock, MA_NOTOWNED); + /* * We finally have a valid bp. We aren't quite out of the * woods, we still have to reserve kva space. In order @@ -2151,39 +2435,47 @@ restart: */ maxsize = (maxsize + BKVAMASK) & ~BKVAMASK; - if (maxsize != bp->b_kvasize) { - vm_offset_t addr = 0; - int rv; - - bfreekva(bp); - - vm_map_lock(buffer_map); - if (vm_map_findspace(buffer_map, - vm_map_min(buffer_map), maxsize, &addr)) { - /* - * Buffer map is too fragmented. - * We must defragment the map. - */ - atomic_add_int(&bufdefragcnt, 1); - vm_map_unlock(buffer_map); + if (maxsize != bp->b_kvasize || (bp->b_flags & (B_UNMAPPED | + B_KVAALLOC)) == B_UNMAPPED) { + if (allocbufkva(bp, maxsize, gbflags)) { defrag = 1; bp->b_flags |= B_INVAL; brelse(bp); goto restart; } - rv = vm_map_insert(buffer_map, NULL, 0, addr, - addr + maxsize, VM_PROT_ALL, VM_PROT_ALL, - MAP_NOFAULT); - KASSERT(rv == KERN_SUCCESS, - ("vm_map_insert(buffer_map) rv %d", rv)); - vm_map_unlock(buffer_map); - bp->b_kvabase = (caddr_t)addr; - bp->b_kvasize = maxsize; - atomic_add_long(&bufspace, bp->b_kvasize); + atomic_add_int(&bufreusecnt, 1); + } else if ((bp->b_flags & B_KVAALLOC) != 0 && + (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == 0) { + /* + * If the reused buffer has KVA allocated, + * reassign b_kvaalloc to b_kvabase. + */ + bp->b_kvabase = bp->b_kvaalloc; + bp->b_flags &= ~B_KVAALLOC; + atomic_subtract_long(&unmapped_bufspace, + bp->b_kvasize); + atomic_add_int(&bufreusecnt, 1); + } else if ((bp->b_flags & (B_UNMAPPED | B_KVAALLOC)) == 0 && + (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == (GB_UNMAPPED | + GB_KVAALLOC)) { + /* + * The case of reused buffer already have KVA + * mapped, but the request is for unmapped + * buffer with KVA allocated. + */ + bp->b_kvaalloc = bp->b_kvabase; + bp->b_data = bp->b_kvabase = unmapped_buf; + bp->b_flags |= B_UNMAPPED | B_KVAALLOC; + atomic_add_long(&unmapped_bufspace, + bp->b_kvasize); atomic_add_int(&bufreusecnt, 1); } - bp->b_saveaddr = bp->b_kvabase; - bp->b_data = bp->b_saveaddr; + if ((gbflags & GB_UNMAPPED) == 0) { + bp->b_saveaddr = bp->b_kvabase; + bp->b_data = bp->b_saveaddr; + bp->b_flags &= ~B_UNMAPPED; + BUF_CHECK_MAPPED(bp); + } } return (bp); } @@ -2594,6 +2886,90 @@ vfs_setdirty_locked_object(struct buf *bp) } /* + * Allocate the KVA mapping for an existing buffer. It handles the + * cases of both B_UNMAPPED buffer, and buffer with the preallocated + * KVA which is not mapped (B_KVAALLOC). + */ +static void +bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) +{ + struct buf *scratch_bp; + int bsize, maxsize, need_mapping, need_kva; + off_t offset; + + need_mapping = (bp->b_flags & B_UNMAPPED) != 0 && + (gbflags & GB_UNMAPPED) == 0; + need_kva = (bp->b_flags & (B_KVAALLOC | B_UNMAPPED)) == B_UNMAPPED && + (gbflags & GB_KVAALLOC) != 0; + if (!need_mapping && !need_kva) + return; + + BUF_CHECK_UNMAPPED(bp); + + if (need_mapping && (bp->b_flags & B_KVAALLOC) != 0) { + /* + * Buffer is not mapped, but the KVA was already + * reserved at the time of the instantiation. Use the + * allocated space. + */ + bp->b_flags &= ~B_KVAALLOC; + KASSERT(bp->b_kvaalloc != 0, ("kvaalloc == 0")); + bp->b_kvabase = bp->b_kvaalloc; + atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize); + goto has_addr; + } + + /* + * Calculate the amount of the address space we would reserve + * if the buffer was mapped. + */ + bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize; + offset = blkno * bsize; + maxsize = size + (offset & PAGE_MASK); + maxsize = imax(maxsize, bsize); + +mapping_loop: + if (allocbufkva(bp, maxsize, gbflags)) { + /* + * Request defragmentation. getnewbuf() returns us the + * allocated space by the scratch buffer KVA. + */ + scratch_bp = getnewbuf(bp->b_vp, 0, 0, size, maxsize, gbflags | + (GB_UNMAPPED | GB_KVAALLOC)); + if (scratch_bp == NULL) { + if ((gbflags & GB_NOWAIT_BD) != 0) { + /* + * XXXKIB: defragmentation cannot + * succeed, not sure what else to do. + */ + panic("GB_NOWAIT_BD and B_UNMAPPED %p", bp); + } + atomic_add_int(&mappingrestarts, 1); + goto mapping_loop; + } + KASSERT((scratch_bp->b_flags & B_KVAALLOC) != 0, + ("scratch bp !B_KVAALLOC %p", scratch_bp)); + setbufkva(bp, (vm_offset_t)scratch_bp->b_kvaalloc, + scratch_bp->b_kvasize, gbflags); + + /* Get rid of the scratch buffer. */ + scratch_bp->b_kvasize = 0; + scratch_bp->b_flags |= B_INVAL; + scratch_bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC); + brelse(scratch_bp); + } + if (!need_mapping) + return; + +has_addr: + bp->b_saveaddr = bp->b_kvabase; + bp->b_data = bp->b_saveaddr; /* b_offset is handled by bpmap_qenter */ + bp->b_flags &= ~B_UNMAPPED; + BUF_CHECK_MAPPED(bp); + bpmap_qenter(bp); +} + +/* * getblk: * * Get a block given a specified block and offset into a file/device. @@ -2635,12 +3011,17 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, { struct buf *bp; struct bufobj *bo; - int error; + int bsize, error, maxsize, vmio; + off_t offset; CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size); + KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, + ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); if (size > MAXBSIZE) panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE); + if (!unmapped_buf_allowed) + flags &= ~(GB_UNMAPPED | GB_KVAALLOC); bo = &vp->v_bufobj; loop: @@ -2743,12 +3124,18 @@ loop: } /* + * Handle the case of unmapped buffer which should + * become mapped, or the buffer for which KVA + * reservation is requested. + */ + bp_unmapped_get_kva(bp, blkno, size, flags); + + /* * If the size is inconsistant in the VMIO case, we can resize * the buffer. This might lead to B_CACHE getting set or * cleared. If the size has not changed, B_CACHE remains * unchanged from its previous state. */ - if (bp->b_bcount != size) allocbuf(bp, size); @@ -2789,9 +3176,6 @@ loop: } bp->b_flags &= ~B_DONE; } else { - int bsize, maxsize, vmio; - off_t offset; - /* * Buffer is not in-core, create new buffer. The buffer * returned by getnewbuf() is locked. Note that the returned @@ -2807,7 +3191,13 @@ loop: bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize; offset = blkno * bsize; vmio = vp->v_object != NULL; - maxsize = vmio ? size + (offset & PAGE_MASK) : size; + if (vmio) { + maxsize = size + (offset & PAGE_MASK); + } else { + maxsize = size; + /* Do not allow non-VMIO notmapped buffers. */ + flags &= ~GB_UNMAPPED; + } maxsize = imax(maxsize, bsize); bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags); @@ -2863,6 +3253,7 @@ loop: KASSERT(bp->b_bufobj->bo_object == NULL, ("ARGH! has b_bufobj->bo_object %p %p\n", bp, bp->b_bufobj->bo_object)); + BUF_CHECK_MAPPED(bp); } allocbuf(bp, size); @@ -3038,10 +3429,14 @@ allocbuf(struct buf *bp, int size) if (desiredpages < bp->b_npages) { vm_page_t m; - pmap_qremove((vm_offset_t)trunc_page( - (vm_offset_t)bp->b_data) + - (desiredpages << PAGE_SHIFT), - (bp->b_npages - desiredpages)); + if ((bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); + pmap_qremove((vm_offset_t)trunc_page( + (vm_offset_t)bp->b_data) + + (desiredpages << PAGE_SHIFT), + (bp->b_npages - desiredpages)); + } else + BUF_CHECK_UNMAPPED(bp); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (i = desiredpages; i < bp->b_npages; i++) { /* @@ -3147,21 +3542,12 @@ allocbuf(struct buf *bp, int size) VM_OBJECT_WUNLOCK(obj); /* - * Step 3, fixup the KVM pmap. Remember that - * bp->b_data is relative to bp->b_offset, but - * bp->b_offset may be offset into the first page. + * Step 3, fixup the KVM pmap. */ - - bp->b_data = (caddr_t) - trunc_page((vm_offset_t)bp->b_data); - pmap_qenter( - (vm_offset_t)bp->b_data, - bp->b_pages, - bp->b_npages - ); - - bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | - (vm_offset_t)(bp->b_offset & PAGE_MASK)); + if ((bp->b_flags & B_UNMAPPED) == 0) + bpmap_qenter(bp); + else + BUF_CHECK_UNMAPPED(bp); } } if (newbsize < bp->b_bufsize) @@ -3171,21 +3557,38 @@ allocbuf(struct buf *bp, int size) return 1; } +extern int inflight_transient_maps; + void biodone(struct bio *bp) { struct mtx *mtxp; void (*done)(struct bio *); + vm_offset_t start, end; + int transient; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); bp->bio_flags |= BIO_DONE; + if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) { + start = trunc_page((vm_offset_t)bp->bio_data); + end = round_page((vm_offset_t)bp->bio_data + bp->bio_length); + transient = 1; + } else { + transient = 0; + start = end = 0; + } done = bp->bio_done; if (done == NULL) wakeup(bp); mtx_unlock(mtxp); if (done != NULL) done(bp); + if (transient) { + pmap_qremove(start, OFF_TO_IDX(end - start)); + vm_map_remove(bio_transient_map, start, end); + atomic_add_int(&inflight_transient_maps, -1); + } } /* @@ -3288,7 +3691,7 @@ dev_strategy(struct cdev *dev, struct buf *bp) bip->bio_offset = bp->b_iooffset; bip->bio_length = bp->b_bcount; bip->bio_bcount = bp->b_bcount; /* XXX: remove */ - bip->bio_data = bp->b_data; + bdata2bio(bp, bip); bip->bio_done = bufdonebio; bip->bio_caller2 = bp; bip->bio_dev = dev; @@ -3442,9 +3845,11 @@ bufdone_finish(struct buf *bp) } vm_object_pip_wakeupn(obj, 0); VM_OBJECT_WUNLOCK(obj); - if (bogus) + if (bogus && (bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + } } /* @@ -3487,8 +3892,12 @@ vfs_unbusy_pages(struct buf *bp) if (!m) panic("vfs_unbusy_pages: page missing\n"); bp->b_pages[i] = m; - pmap_qenter(trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); + if ((bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); + pmap_qenter(trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, bp->b_npages); + } else + BUF_CHECK_UNMAPPED(bp); } vm_object_pip_subtract(obj, 1); vm_page_io_finish(m); @@ -3653,9 +4062,11 @@ vfs_busy_pages(struct buf *bp, int clear_modify) foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; } VM_OBJECT_WUNLOCK(obj); - if (bogus) + if (bogus && (bp->b_flags & B_UNMAPPED) == 0) { + BUF_CHECK_MAPPED(bp); pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + } } /* @@ -3777,6 +4188,8 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) vm_page_t p; int index; + BUF_CHECK_MAPPED(bp); + to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; @@ -3808,6 +4221,8 @@ vm_hold_free_pages(struct buf *bp, int newbsize) vm_page_t p; int index, newnpages; + BUF_CHECK_MAPPED(bp); + from = round_page((vm_offset_t)bp->b_data + newbsize); newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; if (bp->b_npages > newnpages) @@ -4009,6 +4424,30 @@ bunpin_wait(struct buf *bp) mtx_unlock(mtxp); } +/* + * Set bio_data or bio_ma for struct bio from the struct buf. + */ +void +bdata2bio(struct buf *bp, struct bio *bip) +{ + + if ((bp->b_flags & B_UNMAPPED) != 0) { + KASSERT(unmapped_buf_allowed, ("unmapped")); + bip->bio_ma = bp->b_pages; + bip->bio_ma_n = bp->b_npages; + bip->bio_data = unmapped_buf; + bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK; + bip->bio_flags |= BIO_UNMAPPED; + KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) / + PAGE_SIZE == bp->b_npages, + ("Buffer %p too short: %d %d %d", bp, bip->bio_ma_offset, + bip->bio_length, bip->bio_ma_n)); + } else { + bip->bio_data = bp->b_data; + bip->bio_ma = NULL; + } +} + #include "opt_ddb.h" #ifdef DDB #include <ddb/ddb.h> |