summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2002-07-19 07:29:39 +0000
committermckusick <mckusick@FreeBSD.org>2002-07-19 07:29:39 +0000
commitb44cb5787c401c1aaf5bbd0e42c211579b8efc31 (patch)
treeab6d723eb551696589894f8c55d99db82233b8c0 /sys
parent083a6fe2b0dce41b375be7f611b37e64ec218129 (diff)
downloadFreeBSD-src-b44cb5787c401c1aaf5bbd0e42c211579b8efc31.zip
FreeBSD-src-b44cb5787c401c1aaf5bbd0e42c211579b8efc31.tar.gz
Add support to UFS2 to provide storage for extended attributes.
As this code is not actually used by any of the existing interfaces, it seems unlikely to break anything (famous last words). The internal kernel interface to manipulate these attributes is invoked using two new IO_ flags: IO_NORMAL and IO_EXT. These flags may be specified in the ioflags word of VOP_READ, VOP_WRITE, and VOP_TRUNCATE. Specifying IO_NORMAL means that you want to do I/O to the normal data part of the file and IO_EXT means that you want to do I/O to the extended attributes part of the file. IO_NORMAL and IO_EXT are mutually exclusive for VOP_READ and VOP_WRITE, but may be specified individually or together in the case of VOP_TRUNCATE. For example, when removing a file, VOP_TRUNCATE is called with both IO_NORMAL and IO_EXT set. For backward compatibility, if neither IO_NORMAL nor IO_EXT is set, then IO_NORMAL is assumed. Note that the BA_ and IO_ flags have been `merged' so that they may both be used in the same flags word. This merger is possible by assigning the IO_ flags to the low sixteen bits and the BA_ flags the high sixteen bits. This works because the high sixteen bits of the IO_ word is reserved for read-ahead and help with write clustering so will never be used for flags. This merge lets us get away from code of the form: if (ioflags & IO_SYNC) flags |= BA_SYNC; For the future, I have considered adding a new field to the vattr structure, va_extsize. This addition could then be exported through the stat structure to allow applications to find out the size of the extended attribute storage and also would provide a more standard interface for truncating them (via VOP_SETATTR rather than VOP_TRUNCATE). I am also contemplating adding a pathconf parameter (for concreteness, lets call it _PC_MAX_EXTSIZE) which would let an application determine the maximum size of the extended atribute storage. Sponsored by: DARPA & NAI Labs.
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_bio.c4
-rw-r--r--sys/kern/vfs_subr.c136
-rw-r--r--sys/sys/buf.h1
-rw-r--r--sys/sys/vnode.h24
-rw-r--r--sys/ufs/ffs/ffs_alloc.c7
-rw-r--r--sys/ufs/ffs/ffs_balloc.c190
-rw-r--r--sys/ufs/ffs/ffs_extern.h8
-rw-r--r--sys/ufs/ffs/ffs_inode.c102
-rw-r--r--sys/ufs/ffs/ffs_softdep.c521
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c14
-rw-r--r--sys/ufs/ffs/softdep.h13
-rw-r--r--sys/ufs/ufs/ufs_bmap.c19
-rw-r--r--sys/ufs/ufs/ufs_extern.h18
-rw-r--r--sys/ufs/ufs/ufs_inode.c3
-rw-r--r--sys/ufs/ufs/ufs_lookup.c5
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c341
-rw-r--r--sys/ufs/ufs/ufs_vnops.c13
17 files changed, 1145 insertions, 274 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 58a5adb..9c19863 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1311,7 +1311,7 @@ brelse(struct buf * bp)
/* buffers with no memory */
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
- bp->b_xflags &= ~BX_BKGRDWRITE;
+ bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
if (bp->b_xflags & BX_BKGRDINPROG)
panic("losing buffer 1");
if (bp->b_kvasize) {
@@ -1329,7 +1329,7 @@ brelse(struct buf * bp)
} else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) ||
(bp->b_ioflags & BIO_ERROR)) {
bp->b_flags |= B_INVAL;
- bp->b_xflags &= ~BX_BKGRDWRITE;
+ bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
if (bp->b_xflags & BX_BKGRDINPROG)
panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 9fd2b0f..e620d4a 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -76,6 +76,8 @@ static void addalias(struct vnode *vp, dev_t nvp_rdev);
static void insmntque(struct vnode *vp, struct mount *mp);
static void vclean(struct vnode *vp, int flags, struct thread *td);
static void vlruvp(struct vnode *vp);
+static int flushbuflist(struct buf *blist, int flags, struct vnode *vp,
+ int slpflag, int slptimeo, int *errorp);
/*
* Number of vnodes in existence. Increased whenever getnewvnode()
@@ -898,14 +900,13 @@ vwakeup(bp)
*/
int
vinvalbuf(vp, flags, cred, td, slpflag, slptimeo)
- register struct vnode *vp;
+ struct vnode *vp;
int flags;
struct ucred *cred;
struct thread *td;
int slpflag, slptimeo;
{
- register struct buf *bp;
- struct buf *nbp, *blist;
+ struct buf *blist;
int s, error;
vm_object_t object;
@@ -934,55 +935,24 @@ vinvalbuf(vp, flags, cred, td, slpflag, slptimeo)
splx(s);
}
s = splbio();
- for (;;) {
- blist = TAILQ_FIRST(&vp->v_cleanblkhd);
- if (!blist)
- blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
- if (!blist)
- break;
-
- for (bp = blist; bp; bp = nbp) {
- nbp = TAILQ_NEXT(bp, b_vnbufs);
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
- error = BUF_TIMELOCK(bp,
- LK_EXCLUSIVE | LK_SLEEPFAIL,
- "vinvalbuf", slpflag, slptimeo);
- if (error == ENOLCK)
- break;
- splx(s);
- return (error);
- }
- /*
- * XXX Since there are no node locks for NFS, I
- * believe there is a slight chance that a delayed
- * write will occur while sleeping just above, so
- * check for it. Note that vfs_bio_awrite expects
- * buffers to reside on a queue, while BUF_WRITE and
- * brelse do not.
- */
- if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
- (flags & V_SAVE)) {
-
- if (bp->b_vp == vp) {
- if (bp->b_flags & B_CLUSTEROK) {
- BUF_UNLOCK(bp);
- vfs_bio_awrite(bp);
- } else {
- bremfree(bp);
- bp->b_flags |= B_ASYNC;
- BUF_WRITE(bp);
- }
- } else {
- bremfree(bp);
- (void) BUF_WRITE(bp);
- }
+ for (error = 0;;) {
+ if ((blist = TAILQ_FIRST(&vp->v_cleanblkhd)) != 0 &&
+ flushbuflist(blist, flags, vp, slpflag, slptimeo, &error)) {
+ if (error)
break;
- }
- bremfree(bp);
- bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
- brelse(bp);
+ continue;
+ }
+ if ((blist = TAILQ_FIRST(&vp->v_dirtyblkhd)) != 0 &&
+ flushbuflist(blist, flags, vp, slpflag, slptimeo, &error)) {
+ if (error)
+ break;
+ continue;
}
+ break;
+ }
+ if (error) {
+ splx(s);
+ return (error);
}
/*
@@ -1013,12 +983,76 @@ vinvalbuf(vp, flags, cred, td, slpflag, slptimeo)
}
mtx_unlock(&vp->v_interlock);
- if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
+ if ((flags & (V_ALT | V_NORMAL)) == 0 &&
+ (!TAILQ_EMPTY(&vp->v_dirtyblkhd) ||
+ !TAILQ_EMPTY(&vp->v_cleanblkhd)))
panic("vinvalbuf: flush failed");
return (0);
}
/*
+ * Flush out buffers on the specified list.
+ */
+static int
+flushbuflist(blist, flags, vp, slpflag, slptimeo, errorp)
+ struct buf *blist;
+ int flags;
+ struct vnode *vp;
+ int slpflag, slptimeo;
+ int *errorp;
+{
+ struct buf *bp, *nbp;
+ int found, error;
+
+ for (found = 0, bp = blist; bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) ||
+ ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0))
+ continue;
+ found += 1;
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
+ error = BUF_TIMELOCK(bp,
+ LK_EXCLUSIVE | LK_SLEEPFAIL,
+ "flushbuf", slpflag, slptimeo);
+ if (error != ENOLCK)
+ *errorp = error;
+ return (found);
+ }
+ /*
+ * XXX Since there are no node locks for NFS, I
+ * believe there is a slight chance that a delayed
+ * write will occur while sleeping just above, so
+ * check for it. Note that vfs_bio_awrite expects
+ * buffers to reside on a queue, while BUF_WRITE and
+ * brelse do not.
+ */
+ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
+ (flags & V_SAVE)) {
+
+ if (bp->b_vp == vp) {
+ if (bp->b_flags & B_CLUSTEROK) {
+ BUF_UNLOCK(bp);
+ vfs_bio_awrite(bp);
+ } else {
+ bremfree(bp);
+ bp->b_flags |= B_ASYNC;
+ BUF_WRITE(bp);
+ }
+ } else {
+ bremfree(bp);
+ (void) BUF_WRITE(bp);
+ }
+ return (found);
+ }
+ bremfree(bp);
+ bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
+ bp->b_flags &= ~B_ASYNC;
+ brelse(bp);
+ }
+ return (found);
+}
+
+/*
* Truncate a file's buffer and pages to a specified length. This
* is in lieu of the old vinvalbuf mechanism, which performed unneeded
* sync activity.
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 91a803c..04f4ebe 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -257,6 +257,7 @@ struct buf {
#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
+#define BX_ALTDATA 0x00000040 /* Holds extended data */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 0c5523a..8339112 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -225,16 +225,18 @@ struct vattr {
* Flags for ioflag. (high 16 bits used to ask for read-ahead and
* help with write clustering)
*/
-#define IO_UNIT 0x01 /* do I/O as atomic unit */
-#define IO_APPEND 0x02 /* append write to end */
-#define IO_SYNC 0x04 /* do I/O synchronously */
-#define IO_NODELOCKED 0x08 /* underlying node already locked */
-#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */
-#define IO_VMIO 0x20 /* data already in VMIO space */
-#define IO_INVAL 0x40 /* invalidate after I/O */
-#define IO_ASYNC 0x80 /* bawrite rather then bdwrite */
-#define IO_DIRECT 0x100 /* attempt to bypass buffer cache */
-#define IO_NOWDRAIN 0x200 /* do not block on wdrain */
+#define IO_UNIT 0x0001 /* do I/O as atomic unit */
+#define IO_APPEND 0x0002 /* append write to end */
+#define IO_SYNC 0x0004 /* do I/O synchronously */
+#define IO_NODELOCKED 0x0008 /* underlying node already locked */
+#define IO_NDELAY 0x0010 /* FNDELAY flag set in file table */
+#define IO_VMIO 0x0020 /* data already in VMIO space */
+#define IO_INVAL 0x0040 /* invalidate after I/O */
+#define IO_ASYNC 0x0080 /* bawrite rather then bdwrite */
+#define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
+#define IO_NOWDRAIN 0x0200 /* do not block on wdrain */
+#define IO_EXT 0x0400 /* operate on external attributes */
+#define IO_NORMAL 0x0800 /* operate on regular data */
/*
* Modes. Some values same as Ixxx entries from inode.h for now.
@@ -281,6 +283,8 @@ extern int vttoif_tab[];
#define WRITECLOSE 0x0004 /* vflush: only close writable files */
#define DOCLOSE 0x0008 /* vclean: close active files */
#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
+#define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */
+#define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */
#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
#define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */
#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 710d6d1..1360ec8 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -188,9 +188,10 @@ nospace:
* invoked to get an appropriate block.
*/
int
-ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
+ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp)
struct inode *ip;
ufs2_daddr_t lbprev;
+ ufs2_daddr_t bprev;
ufs2_daddr_t bpref;
int osize, nsize;
struct ucred *cred;
@@ -200,7 +201,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
struct fs *fs;
struct buf *bp;
int cg, request, error, reclaimed;
- ufs2_daddr_t bprev, bno;
+ ufs2_daddr_t bno;
*bpp = 0;
vp = ITOV(ip);
@@ -224,7 +225,7 @@ retry:
if (suser_cred(cred, PRISON_ROOT) &&
freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0)
goto nospace;
- if ((bprev = DIP(ip, i_db[lbprev])) == 0) {
+ if (bprev == 0) {
printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev,
fs->fs_fsmnt);
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 9b1c383..d9e8a08 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -73,6 +73,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
struct ucred *cred, int flags, struct buf **bpp)
{
struct inode *ip;
+ struct ufs1_dinode *dp;
ufs_lbn_t lbn, lastlbn;
struct fs *fs;
ufs1_daddr_t nb;
@@ -86,12 +87,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
struct thread *td = curthread; /* XXX */
ip = VTOI(vp);
+ dp = ip->i_din1;
fs = ip->i_fs;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
if (size > fs->fs_bsize)
panic("ffs_balloc_ufs1: blk too big");
*bpp = NULL;
+ if (flags & IO_EXT)
+ return (EOPNOTSUPP);
if (lbn < 0)
return (EFBIG);
@@ -105,22 +109,20 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
nb = lastlbn;
osize = blksize(fs, ip, nb);
if (osize < fs->fs_bsize && osize > 0) {
- error = ffs_realloccg(ip, nb,
- ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
- &ip->i_din1->di_db[0]),
- osize, (int)fs->fs_bsize, cred, &bp);
+ error = ffs_realloccg(ip, nb, dp->di_db[nb],
+ ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
+ &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
softdep_setup_allocdirect(ip, nb,
- dbtofsb(fs, bp->b_blkno),
- ip->i_din1->di_db[nb],
+ dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
fs->fs_bsize, osize, bp);
ip->i_size = smalllblktosize(fs, nb + 1);
- ip->i_din1->di_size = ip->i_size;
- ip->i_din1->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+ dp->di_size = ip->i_size;
+ dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- if (flags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -132,7 +134,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (lbn < NDADDR) {
if (flags & BA_METAONLY)
panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
- nb = ip->i_din1->di_db[lbn];
+ nb = dp->di_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
@@ -157,10 +159,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
}
bp->b_blkno = fsbtodb(fs, nb);
} else {
- error = ffs_realloccg(ip, lbn,
+ error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
ffs_blkpref_ufs1(ip, lbn, (int)lbn,
- &ip->i_din1->di_db[0]),
- osize, nsize, cred, &bp);
+ &dp->di_db[0]), osize, nsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@@ -174,8 +175,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
else
nsize = fs->fs_bsize;
error = ffs_alloc(ip, lbn,
- ffs_blkpref_ufs1(ip, lbn, (int)lbn,
- &ip->i_din1->di_db[0]),
+ ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
nsize, cred, &newb);
if (error)
return (error);
@@ -187,7 +187,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
softdep_setup_allocdirect(ip, lbn, newb, 0,
nsize, 0, bp);
}
- ip->i_din1->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+ dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
*bpp = bp;
return (0);
@@ -206,7 +206,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* Fetch the first indirect block allocating if necessary.
*/
--num;
- nb = ip->i_din1->di_ib[indirs[0].in_off];
+ nb = dp->di_ib[indirs[0].in_off];
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
@@ -233,7 +233,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
else if ((error = bwrite(bp)) != 0)
goto fail;
}
- allocib = &ip->i_din1->di_ib[indirs[0].in_off];
+ allocib = &dp->di_ib[indirs[0].in_off];
*allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
@@ -289,7 +289,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -329,7 +329,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -382,7 +382,7 @@ fail:
} else {
bap = (ufs1_daddr_t *)bp->b_data;
bap[indirs[unwindidx].in_off] = 0;
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -398,7 +398,7 @@ fail:
*/
(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
- ip->i_din1->di_blocks -= btodb(deallocated);
+ dp->di_blocks -= btodb(deallocated);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
@@ -417,6 +417,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
struct ucred *cred, int flags, struct buf **bpp)
{
struct inode *ip;
+ struct ufs2_dinode *dp;
ufs_lbn_t lbn, lastlbn;
struct fs *fs;
struct buf *bp, *nbp;
@@ -428,6 +429,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
struct thread *td = curthread; /* XXX */
ip = VTOI(vp);
+ dp = ip->i_din2;
fs = ip->i_fs;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
@@ -438,6 +440,112 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
return (EFBIG);
/*
+ * Check for allocating external data.
+ */
+ if (flags & IO_EXT) {
+ if (lbn >= NXADDR)
+ return (EFBIG);
+ /*
+ * If the next write will extend the data into a new block,
+ * and the data is currently composed of a fragment
+ * this fragment has to be extended to be a full block.
+ */
+ lastlbn = lblkno(fs, dp->di_extsize);
+ if (lastlbn < lbn) {
+ nb = lastlbn;
+ osize = sblksize(fs, dp->di_extsize, nb);
+ if (osize < fs->fs_bsize && osize > 0) {
+ error = ffs_realloccg(ip, -1 - nb,
+ dp->di_extb[nb],
+ ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
+ &dp->di_extb[0]), osize,
+ (int)fs->fs_bsize, cred, &bp);
+ if (error)
+ return (error);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, nb,
+ dbtofsb(fs, bp->b_blkno),
+ dp->di_extb[nb],
+ fs->fs_bsize, osize, bp);
+ dp->di_extsize = smalllblktosize(fs, nb + 1);
+ dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
+ bp->b_xflags |= BX_ALTDATA;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (flags & IO_SYNC)
+ bwrite(bp);
+ else
+ bawrite(bp);
+ }
+ }
+ /*
+ * All blocks are direct blocks
+ */
+ if (flags & BA_METAONLY)
+ panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
+ nb = dp->di_extb[lbn];
+ if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
+ error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bp->b_blkno = fsbtodb(fs, nb);
+ bp->b_xflags |= BX_ALTDATA;
+ *bpp = bp;
+ return (0);
+ }
+ if (nb != 0) {
+ /*
+ * Consider need to reallocate a fragment.
+ */
+ osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
+ nsize = fragroundup(fs, size);
+ if (nsize <= osize) {
+ error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bp->b_blkno = fsbtodb(fs, nb);
+ bp->b_xflags |= BX_ALTDATA;
+ } else {
+ error = ffs_realloccg(ip, -1 - lbn,
+ dp->di_extb[lbn],
+ ffs_blkpref_ufs2(ip, lbn, (int)lbn,
+ &dp->di_extb[0]), osize, nsize, cred, &bp);
+ if (error)
+ return (error);
+ bp->b_xflags |= BX_ALTDATA;
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, lbn,
+ dbtofsb(fs, bp->b_blkno), nb,
+ nsize, osize, bp);
+ }
+ } else {
+ if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
+ nsize = fragroundup(fs, size);
+ else
+ nsize = fs->fs_bsize;
+ error = ffs_alloc(ip, lbn,
+ ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
+ nsize, cred, &newb);
+ if (error)
+ return (error);
+ bp = getblk(vp, -1 - lbn, nsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ bp->b_xflags |= BX_ALTDATA;
+ if (flags & BA_CLRBUF)
+ vfs_bio_clrbuf(bp);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, lbn, newb, 0,
+ nsize, 0, bp);
+ }
+ dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ *bpp = bp;
+ return (0);
+ }
+ /*
* If the next write will extend the file into a new block,
* and the file is currently composed of a fragment
* this fragment has to be extended to be a full block.
@@ -447,22 +555,22 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
nb = lastlbn;
osize = blksize(fs, ip, nb);
if (osize < fs->fs_bsize && osize > 0) {
- error = ffs_realloccg(ip, nb,
+ error = ffs_realloccg(ip, nb, dp->di_db[nb],
ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
- &ip->i_din2->di_db[0]),
- osize, (int)fs->fs_bsize, cred, &bp);
+ &dp->di_db[0]), osize, (int)fs->fs_bsize,
+ cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
softdep_setup_allocdirect(ip, nb,
dbtofsb(fs, bp->b_blkno),
- ip->i_din2->di_db[nb],
+ dp->di_db[nb],
fs->fs_bsize, osize, bp);
ip->i_size = smalllblktosize(fs, nb + 1);
- ip->i_din2->di_size = ip->i_size;
- ip->i_din2->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+ dp->di_size = ip->i_size;
+ dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- if (flags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -474,7 +582,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (lbn < NDADDR) {
if (flags & BA_METAONLY)
panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
- nb = ip->i_din2->di_db[lbn];
+ nb = dp->di_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
@@ -499,10 +607,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
}
bp->b_blkno = fsbtodb(fs, nb);
} else {
- error = ffs_realloccg(ip, lbn,
+ error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
- &ip->i_din2->di_db[0]),
- osize, nsize, cred, &bp);
+ &dp->di_db[0]), osize, nsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@@ -517,8 +624,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
nsize = fs->fs_bsize;
error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
- &ip->i_din2->di_db[0]),
- nsize, cred, &newb);
+ &dp->di_db[0]), nsize, cred, &newb);
if (error)
return (error);
bp = getblk(vp, lbn, nsize, 0, 0);
@@ -529,7 +635,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
softdep_setup_allocdirect(ip, lbn, newb, 0,
nsize, 0, bp);
}
- ip->i_din2->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+ dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
*bpp = bp;
return (0);
@@ -548,7 +654,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* Fetch the first indirect block allocating if necessary.
*/
--num;
- nb = ip->i_din2->di_ib[indirs[0].in_off];
+ nb = dp->di_ib[indirs[0].in_off];
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
@@ -575,7 +681,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
else if ((error = bwrite(bp)) != 0)
goto fail;
}
- allocib = &ip->i_din2->di_ib[indirs[0].in_off];
+ allocib = &dp->di_ib[indirs[0].in_off];
*allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
@@ -631,7 +737,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -671,7 +777,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -724,7 +830,7 @@ fail:
} else {
bap = (ufs2_daddr_t *)bp->b_data;
bap[indirs[unwindidx].in_off] = 0;
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -740,7 +846,7 @@ fail:
*/
(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
- ip->i_din2->di_blocks -= btodb(deallocated);
+ dp->di_blocks -= btodb(deallocated);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index ae59ca3..c2972c8 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -79,8 +79,8 @@ int ffs_mountroot(void);
int ffs_mount(struct mount *, char *, caddr_t, struct nameidata *,
struct thread *);
int ffs_reallocblks(struct vop_reallocblks_args *);
-int ffs_realloccg(struct inode *,
- ufs2_daddr_t, ufs2_daddr_t, int, int, struct ucred *, struct buf **);
+int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
+ ufs2_daddr_t, int, int, struct ucred *, struct buf **);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
void ffs_snapremove(struct vnode *vp);
@@ -115,11 +115,13 @@ void softdep_update_inodeblock(struct inode *, struct buf *, int);
void softdep_load_inodeblock(struct inode *);
void softdep_freefile(struct vnode *, ino_t, int);
int softdep_request_cleanup(struct fs *, struct vnode *);
-void softdep_setup_freeblocks(struct inode *, off_t);
+void softdep_setup_freeblocks(struct inode *, off_t, int);
void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
void softdep_setup_blkmapdep(struct buf *, struct fs *, ufs2_daddr_t);
void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,
ufs2_daddr_t, long, long, struct buf *);
+void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t,
+ ufs2_daddr_t, long, long, struct buf *);
void softdep_setup_allocindir_meta(struct buf *, struct inode *,
struct buf *, int, ufs2_daddr_t);
void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 08e5fdd..83fa66e 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -146,22 +146,81 @@ ffs_truncate(vp, length, flags, cred, td)
struct inode *oip;
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
- ufs2_daddr_t count, blocksreleased = 0;
+ ufs2_daddr_t count, blocksreleased = 0, datablocks;
struct fs *fs;
struct buf *bp;
+ int needextclean, softdepslowdown, extblocks;
int offset, size, level, nblocks;
- int i, aflags, error, allerror;
+ int i, error, allerror;
off_t osize;
oip = VTOI(ovp);
fs = oip->i_fs;
if (length < 0)
return (EINVAL);
+ /*
+ * Historically clients did not have to specify which data
+ * they were truncating. So, if not specified, we assume
+ * traditional behavior, e.g., just the normal data.
+ */
+ if ((flags & (IO_EXT | IO_NORMAL)) == 0)
+ flags |= IO_NORMAL;
+ /*
+ * If we are truncating the extended-attributes, and cannot
+ * do it with soft updates, then do it slowly here. If we are
+ * truncating both the extended attributes and the file contents
+ * (e.g., the file is being unlinked), then pick it off with
+ * soft updates below.
+ */
+ needextclean = 0;
+ softdepslowdown = softdep_slowdown(ovp);
+ extblocks = 0;
+ datablocks = DIP(oip, i_blocks);
+ if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) {
+ extblocks = btodb(fragroundup(fs, oip->i_din2->di_extsize));
+ datablocks -= extblocks;
+ }
+ if ((flags & IO_EXT) && extblocks > 0) {
+ if (DOINGSOFTDEP(ovp) && softdepslowdown == 0 && length == 0) {
+ if ((flags & IO_NORMAL) == 0) {
+ softdep_setup_freeblocks(oip, length, IO_EXT);
+ return (0);
+ }
+ needextclean = 1;
+ } else {
+ if (length != 0)
+ panic("ffs_truncate: partial trunc of extdata");
+ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0)
+ return (error);
+ osize = oip->i_din2->di_extsize;
+ oip->i_din2->di_blocks -= extblocks;
+#ifdef QUOTA
+ (void) chkdq(oip, -extblocks, NOCRED, 0);
+#endif
+ vinvalbuf(ovp, V_ALT, cred, td, 0, 0);
+ oip->i_din2->di_extsize = 0;
+ for (i = 0; i < NXADDR; i++) {
+ oldblks[i] = oip->i_din2->di_extb[i];
+ oip->i_din2->di_extb[i] = 0;
+ }
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if ((error = ffs_update(ovp, 1)))
+ return (error);
+ for (i = 0; i < NXADDR; i++) {
+ if (oldblks[i] == 0)
+ continue;
+ ffs_blkfree(fs, oip->i_devvp, oldblks[i],
+ sblksize(fs, osize, i), oip->i_number);
+ }
+ }
+ }
+ if ((flags & IO_NORMAL) == 0)
+ return (0);
if (length > fs->fs_maxfilesize)
return (EFBIG);
if (ovp->v_type == VLNK &&
(oip->i_size < ovp->v_mount->mnt_maxsymlinklen ||
- DIP(oip, i_blocks) == 0)) {
+ datablocks == 0)) {
#ifdef DIAGNOSTIC
if (length != 0)
panic("ffs_truncate: partial truncate of symlink");
@@ -170,10 +229,14 @@ ffs_truncate(vp, length, flags, cred, td)
oip->i_size = 0;
DIP(oip, i_size) = 0;
oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (needextclean)
+ softdep_setup_freeblocks(oip, length, IO_EXT);
return (UFS_UPDATE(ovp, 1));
}
if (oip->i_size == length) {
oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (needextclean)
+ softdep_setup_freeblocks(oip, length, IO_EXT);
return (UFS_UPDATE(ovp, 0));
}
if (fs->fs_ronly)
@@ -187,7 +250,7 @@ ffs_truncate(vp, length, flags, cred, td)
ffs_snapremove(ovp);
ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
if (DOINGSOFTDEP(ovp)) {
- if (length > 0 || softdep_slowdown(ovp)) {
+ if (length > 0 || softdepslowdown) {
/*
* If a file is only partially truncated, then
* we have to clean up the data structures
@@ -197,17 +260,18 @@ ffs_truncate(vp, length, flags, cred, td)
* rarely, we solve the problem by syncing the file
* so that it will have no data structures left.
*/
- if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
- td)) != 0)
+ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0)
return (error);
if (oip->i_flag & IN_SPACECOUNTED)
- fs->fs_pendingblocks -= DIP(oip, i_blocks);
+ fs->fs_pendingblocks -= datablocks;
} else {
#ifdef QUOTA
- (void) chkdq(oip, -DIP(oip, i_blocks), NOCRED, 0);
+ (void) chkdq(oip, -datablocks, NOCRED, 0);
#endif
- softdep_setup_freeblocks(oip, length);
- vinvalbuf(ovp, 0, cred, td, 0, 0);
+ softdep_setup_freeblocks(oip, length, needextclean ?
+ IO_EXT | IO_NORMAL : IO_NORMAL);
+ vinvalbuf(ovp, needextclean ? 0 : V_NORMAL,
+ cred, td, 0, 0);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ffs_update(ovp, 0));
}
@@ -220,18 +284,15 @@ ffs_truncate(vp, length, flags, cred, td)
*/
if (osize < length) {
vnode_pager_setsize(ovp, length);
- aflags = BA_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= BA_SYNC;
- error = UFS_BALLOC(ovp, length - 1, 1,
- cred, aflags, &bp);
+ flags |= BA_CLRBUF;
+ error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp);
if (error)
return (error);
oip->i_size = length;
DIP(oip, i_size) = length;
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (aflags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -252,10 +313,8 @@ ffs_truncate(vp, length, flags, cred, td)
DIP(oip, i_size) = length;
} else {
lbn = lblkno(fs, length);
- aflags = BA_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= BA_SYNC;
- error = UFS_BALLOC(ovp, length - 1, 1, cred, aflags, &bp);
+ flags |= BA_CLRBUF;
+ error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp);
if (error) {
return (error);
}
@@ -281,7 +340,7 @@ ffs_truncate(vp, length, flags, cred, td)
allocbuf(bp, size);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (aflags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -420,6 +479,7 @@ done:
if (newblks[i] != DIP(oip, i_db[i]))
panic("ffs_truncate2");
if (length == 0 &&
+ (fs->fs_magic != FS_UFS2_MAGIC || oip->i_din2->di_extsize == 0) &&
(!TAILQ_EMPTY(&ovp->v_dirtyblkhd) ||
!TAILQ_EMPTY(&ovp->v_cleanblkhd)))
panic("ffs_truncate3");
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index f036150..631c82a 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -157,6 +157,7 @@ static void clear_inodedeps(struct thread *);
static int flush_pagedep_deps(struct vnode *, struct mount *,
struct diraddhd *);
static int flush_inodedep_deps(struct fs *, ino_t);
+static int flush_deplist(struct allocdirectlst *, int, int *);
static int handle_written_filepage(struct pagedep *, struct buf *);
static void diradd_inode_written(struct diradd *, struct inodedep *);
static int handle_written_inodeblock(struct inodedep *, struct buf *);
@@ -181,7 +182,7 @@ static void free_allocdirect(struct allocdirectlst *,
static int check_inode_unwritten(struct inodedep *);
static int free_inodedep(struct inodedep *);
static void handle_workitem_freeblocks(struct freeblks *, int);
-static void merge_inode_lists(struct inodedep *);
+static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
static void setup_allocindir_phase2(struct buf *, struct inode *,
struct allocindir *);
static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
@@ -1041,12 +1042,15 @@ top:
inodedep->id_nlinkdelta = 0;
inodedep->id_savedino1 = NULL;
inodedep->id_savedsize = -1;
+ inodedep->id_savedextsize = -1;
inodedep->id_buf = NULL;
LIST_INIT(&inodedep->id_pendinghd);
LIST_INIT(&inodedep->id_inowait);
LIST_INIT(&inodedep->id_bufwait);
TAILQ_INIT(&inodedep->id_inoupdt);
TAILQ_INIT(&inodedep->id_newinoupdt);
+ TAILQ_INIT(&inodedep->id_extupdt);
+ TAILQ_INIT(&inodedep->id_newextupdt);
ACQUIRE_LOCK(&lk);
LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
sema_release(&inodedep_in_progress);
@@ -1566,6 +1570,103 @@ handle_workitem_freefrag(freefrag)
}
/*
+ * Set up a dependency structure for an external attributes data block.
+ * This routine follows much of the structure of softdep_setup_allocdirect.
+ * See the description of softdep_setup_allocdirect above for details.
+ */
+void
+softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t newblkno;
+ ufs2_daddr_t oldblkno;
+ long newsize;
+ long oldsize;
+ struct buf *bp;
+{
+ struct allocdirect *adp, *oldadp;
+ struct allocdirectlst *adphead;
+ struct bmsafemap *bmsafemap;
+ struct inodedep *inodedep;
+ struct newblk *newblk;
+
+ MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
+ M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);
+ adp->ad_list.wk_type = D_ALLOCDIRECT;
+ adp->ad_lbn = lbn;
+ adp->ad_newblkno = newblkno;
+ adp->ad_oldblkno = oldblkno;
+ adp->ad_newsize = newsize;
+ adp->ad_oldsize = oldsize;
+ adp->ad_state = ATTACHED | EXTDATA;
+ LIST_INIT(&adp->ad_newdirblk);
+ if (newblkno == oldblkno)
+ adp->ad_freefrag = NULL;
+ else
+ adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
+
+ if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
+ panic("softdep_setup_allocext: lost block");
+
+ ACQUIRE_LOCK(&lk);
+ inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+ adp->ad_inodedep = inodedep;
+
+ if (newblk->nb_state == DEPCOMPLETE) {
+ adp->ad_state |= DEPCOMPLETE;
+ adp->ad_buf = NULL;
+ } else {
+ bmsafemap = newblk->nb_bmsafemap;
+ adp->ad_buf = bmsafemap->sm_buf;
+ LIST_REMOVE(newblk, nb_deps);
+ LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
+ }
+ LIST_REMOVE(newblk, nb_hash);
+ FREE(newblk, M_NEWBLK);
+
+ WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
+ if (lbn >= NXADDR) {
+ FREE_LOCK(&lk);
+ panic("softdep_setup_allocext: lbn %d > NXADDR", lbn);
+ }
+ /*
+ * The list of allocdirects must be kept in sorted and ascending
+ * order so that the rollback routines can quickly determine the
+ * first uncommitted block (the size of the file stored on disk
+ * ends at the end of the lowest committed fragment, or if there
+ * are no fragments, at the end of the highest committed block).
+ * Since files generally grow, the typical case is that the new
+ * block is to be added at the end of the list. We speed this
+ * special case by checking against the last allocdirect in the
+ * list before laboriously traversing the list looking for the
+ * insertion point.
+ */
+ adphead = &inodedep->id_newextupdt;
+ oldadp = TAILQ_LAST(adphead, allocdirectlst);
+ if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
+ /* insert at end of list */
+ TAILQ_INSERT_TAIL(adphead, adp, ad_next);
+ if (oldadp != NULL && oldadp->ad_lbn == lbn)
+ allocdirect_merge(adphead, adp, oldadp);
+ FREE_LOCK(&lk);
+ return;
+ }
+ TAILQ_FOREACH(oldadp, adphead, ad_next) {
+ if (oldadp->ad_lbn >= lbn)
+ break;
+ }
+ if (oldadp == NULL) {
+ FREE_LOCK(&lk);
+ panic("softdep_setup_allocext: lost entry");
+ }
+ /* insert in middle of list */
+ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
+ if (oldadp->ad_lbn == lbn)
+ allocdirect_merge(adphead, adp, oldadp);
+ FREE_LOCK(&lk);
+}
+
+/*
* Indirect block allocation dependencies.
*
* The same dependencies that exist for a direct block also exist when
@@ -1769,7 +1870,8 @@ setup_allocindir_phase2(bp, ip, aip)
LIST_INIT(&newindirdep->ir_deplisthd);
LIST_INIT(&newindirdep->ir_donehd);
if (bp->b_blkno == bp->b_lblkno) {
- ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, NULL, NULL);
+ ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
+ NULL, NULL);
bp->b_blkno = blkno;
}
newindirdep->ir_savebp =
@@ -1809,9 +1911,10 @@ setup_allocindir_phase2(bp, ip, aip)
* can release it.
*/
void
-softdep_setup_freeblocks(ip, length)
+softdep_setup_freeblocks(ip, length, flags)
struct inode *ip; /* The inode whose length is to be reduced */
off_t length; /* The new length for the file */
+ int flags; /* IO_EXT and/or IO_NORMAL */
{
struct freeblks *freeblks;
struct inodedep *inodedep;
@@ -1819,6 +1922,7 @@ softdep_setup_freeblocks(ip, length)
struct vnode *vp;
struct buf *bp;
struct fs *fs;
+ ufs2_daddr_t extblocks, datablocks;
int i, delay, error;
fs = ip->i_fs;
@@ -1831,27 +1935,46 @@ softdep_setup_freeblocks(ip, length)
freeblks->fb_previousinum = ip->i_number;
freeblks->fb_devvp = ip->i_devvp;
freeblks->fb_mnt = ITOV(ip)->v_mount;
- freeblks->fb_oldsize = ip->i_size;
- freeblks->fb_newsize = length;
- freeblks->fb_chkcnt = DIP(ip, i_blocks);
- for (i = 0; i < NDADDR; i++) {
- freeblks->fb_dblks[i] = DIP(ip, i_db[i]);
- DIP(ip, i_db[i]) = 0;
- }
- for (i = 0; i < NIADDR; i++) {
- freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);
- DIP(ip, i_ib[i]) = 0;
- }
- DIP(ip, i_blocks) = 0;
- ip->i_size = 0;
- DIP(ip, i_size) = 0;
- /*
- * If the file was removed, then the space being freed was
- * accounted for then (see softdep_filereleased()). If the
- * file is merely being truncated, then we account for it now.
- */
- if ((ip->i_flag & IN_SPACECOUNTED) == 0)
- fs->fs_pendingblocks += freeblks->fb_chkcnt;
+ extblocks = 0;
+ if (fs->fs_magic == FS_UFS2_MAGIC)
+ extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+ datablocks = DIP(ip, i_blocks) - extblocks;
+ if ((flags & IO_NORMAL) == 0) {
+ freeblks->fb_oldsize = 0;
+ freeblks->fb_chkcnt = 0;
+ } else {
+ freeblks->fb_oldsize = ip->i_size;
+ ip->i_size = 0;
+ DIP(ip, i_size) = 0;
+ freeblks->fb_chkcnt = datablocks;
+ for (i = 0; i < NDADDR; i++) {
+ freeblks->fb_dblks[i] = DIP(ip, i_db[i]);
+ DIP(ip, i_db[i]) = 0;
+ }
+ for (i = 0; i < NIADDR; i++) {
+ freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);
+ DIP(ip, i_ib[i]) = 0;
+ }
+ /*
+ * If the file was removed, then the space being freed was
+ * accounted for then (see softdep_filereleased()). If the
+ * file is merely being truncated, then we account for it now.
+ */
+ if ((ip->i_flag & IN_SPACECOUNTED) == 0)
+ fs->fs_pendingblocks += datablocks;
+ }
+ if ((flags & IO_EXT) == 0) {
+ freeblks->fb_oldextsize = 0;
+ } else {
+ freeblks->fb_oldextsize = ip->i_din2->di_extsize;
+ ip->i_din2->di_extsize = 0;
+ freeblks->fb_chkcnt += extblocks;
+ for (i = 0; i < NXADDR; i++) {
+ freeblks->fb_eblks[i] = ip->i_din2->di_extb[i];
+ ip->i_din2->di_extb[i] = 0;
+ }
+ }
+ DIP(ip, i_blocks) -= freeblks->fb_chkcnt;
/*
* Push the zero'ed inode to to its disk buffer so that we are free
* to delete its dependencies below. Once the dependencies are gone
@@ -1897,9 +2020,18 @@ softdep_setup_freeblocks(ip, length)
* If we still have a bitmap dependency, then the inode has never
* been written to disk, so we can free any fragments without delay.
*/
- merge_inode_lists(inodedep);
- while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
- free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ if (flags & IO_NORMAL) {
+ merge_inode_lists(&inodedep->id_newinoupdt,
+ &inodedep->id_inoupdt);
+ while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
+ free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ }
+ if (flags & IO_EXT) {
+ merge_inode_lists(&inodedep->id_newextupdt,
+ &inodedep->id_extupdt);
+ while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+ free_allocdirect(&inodedep->id_extupdt, adp, delay);
+ }
FREE_LOCK(&lk);
bdwrite(bp);
/*
@@ -1911,14 +2043,21 @@ softdep_setup_freeblocks(ip, length)
vp = ITOV(ip);
ACQUIRE_LOCK(&lk);
drain_output(vp, 1);
- while (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
- bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
+restart:
+ TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
+ if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
+ ((flags & IO_NORMAL) == 0 &&
+ (bp->b_xflags & BX_ALTDATA) == 0))
+ continue;
+ if (getdirtybuf(&bp, MNT_WAIT) == 0)
+ goto restart;
(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
deallocate_dependencies(bp, inodedep);
bp->b_flags |= B_INVAL | B_NOCACHE;
FREE_LOCK(&lk);
brelse(bp);
ACQUIRE_LOCK(&lk);
+ goto restart;
}
if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
@@ -2216,6 +2355,8 @@ check_inode_unwritten(inodedep)
LIST_FIRST(&inodedep->id_inowait) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
inodedep->id_nlinkdelta != 0)
return (0);
inodedep->id_state |= ALLCOMPLETE;
@@ -2249,6 +2390,8 @@ free_inodedep(inodedep)
LIST_FIRST(&inodedep->id_inowait) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
return (0);
LIST_REMOVE(inodedep, id_hash);
@@ -2288,30 +2431,48 @@ handle_workitem_freeblocks(freeblks, flags)
nblocks = btodb(fs->fs_bsize);
blocksreleased = 0;
/*
- * Indirect blocks first.
+ * Release all extended attribute blocks or frags.
*/
- for (level = (NIADDR - 1); level >= 0; level--) {
- if ((bn = freeblks->fb_iblks[level]) == 0)
- continue;
- if ((error = indir_trunc(freeblks, fsbtodb(fs, bn), level,
- baselbns[level], &blocksreleased)) == 0)
- allerror = error;
- ffs_blkfree(fs, freeblks->fb_devvp, bn, fs->fs_bsize,
- freeblks->fb_previousinum);
- fs->fs_pendingblocks -= nblocks;
- blocksreleased += nblocks;
+ if (freeblks->fb_oldextsize > 0) {
+ for (i = (NXADDR - 1); i >= 0; i--) {
+ if ((bn = freeblks->fb_eblks[i]) == 0)
+ continue;
+ bsize = sblksize(fs, freeblks->fb_oldextsize, i);
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
+ freeblks->fb_previousinum);
+ blocksreleased += btodb(bsize);
+ }
}
/*
- * All direct blocks or frags.
+ * Release all data blocks or frags.
*/
- for (i = (NDADDR - 1); i >= 0; i--) {
- if ((bn = freeblks->fb_dblks[i]) == 0)
- continue;
- bsize = sblksize(fs, freeblks->fb_oldsize, i);
- ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
- freeblks->fb_previousinum);
- fs->fs_pendingblocks -= btodb(bsize);
- blocksreleased += btodb(bsize);
+ if (freeblks->fb_oldsize > 0) {
+ /*
+ * Indirect blocks first.
+ */
+ for (level = (NIADDR - 1); level >= 0; level--) {
+ if ((bn = freeblks->fb_iblks[level]) == 0)
+ continue;
+ if ((error = indir_trunc(freeblks, fsbtodb(fs, bn),
+ level, baselbns[level], &blocksreleased)) == 0)
+ allerror = error;
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, fs->fs_bsize,
+ freeblks->fb_previousinum);
+ fs->fs_pendingblocks -= nblocks;
+ blocksreleased += nblocks;
+ }
+ /*
+ * All direct blocks or frags.
+ */
+ for (i = (NDADDR - 1); i >= 0; i--) {
+ if ((bn = freeblks->fb_dblks[i]) == 0)
+ continue;
+ bsize = sblksize(fs, freeblks->fb_oldsize, i);
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
+ freeblks->fb_previousinum);
+ fs->fs_pendingblocks -= btodb(bsize);
+ blocksreleased += btodb(bsize);
+ }
}
/*
* If we still have not finished background cleanup, then check
@@ -3049,6 +3210,8 @@ softdep_releasefile(ip)
struct inode *ip; /* inode with the zero effective link count */
{
struct inodedep *inodedep;
+ struct fs *fs;
+ int extblocks;
if (ip->i_effnlink > 0)
panic("softdep_filerelease: file still referenced");
@@ -3073,7 +3236,11 @@ softdep_releasefile(ip)
if ((inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep)))
inodedep->id_state |= SPACECOUNTED;
FREE_LOCK(&lk);
- ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks);
+ fs = ip->i_fs;
+ extblocks = 0;
+ if (fs->fs_magic == FS_UFS2_MAGIC)
+ extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+ ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks) - extblocks;
ip->i_fs->fs_pendinginodes += 1;
ip->i_flag |= IN_SPACECOUNTED;
}
@@ -3404,6 +3571,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
* If no dependencies, then there is nothing to roll back.
*/
inodedep->id_savedsize = dp->di_size;
+ inodedep->id_savedextsize = 0;
if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
return;
/*
@@ -3556,12 +3724,81 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
* If no dependencies, then there is nothing to roll back.
*/
inodedep->id_savedsize = dp->di_size;
- if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
+ inodedep->id_savedextsize = dp->di_extsize;
+ if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL &&
+ TAILQ_FIRST(&inodedep->id_extupdt) == NULL)
return;
/*
- * Set the dependencies to busy.
+ * Set the ext data dependencies to busy.
*/
ACQUIRE_LOCK(&lk);
+ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+ adp = TAILQ_NEXT(adp, ad_next)) {
+#ifdef DIAGNOSTIC
+ if (deplist != 0 && prevlbn >= adp->ad_lbn) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: lbn order");
+ }
+ prevlbn = adp->ad_lbn;
+ if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) {
+ FREE_LOCK(&lk);
+ panic("%s: direct pointer #%jd mismatch %jd != %jd",
+ "softdep_write_inodeblock",
+ (intmax_t)adp->ad_lbn,
+ (intmax_t)dp->di_extb[adp->ad_lbn],
+ (intmax_t)adp->ad_newblkno);
+ }
+ deplist |= 1 << adp->ad_lbn;
+ if ((adp->ad_state & ATTACHED) == 0) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: Unknown state 0x%x",
+ adp->ad_state);
+ }
+#endif /* DIAGNOSTIC */
+ adp->ad_state &= ~ATTACHED;
+ adp->ad_state |= UNDONE;
+ }
+ /*
+ * The on-disk inode cannot claim to be any larger than the last
+ * fragment that has been written. Otherwise, the on-disk inode
+ * might have fragments that were not the last block in the ext
+ * data which would corrupt the filesystem.
+ */
+ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+ lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
+ dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
+ /* keep going until hitting a rollback to a frag */
+ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
+ continue;
+ dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
+ for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
+#ifdef DIAGNOSTIC
+ if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: lost dep1");
+ }
+#endif /* DIAGNOSTIC */
+ dp->di_extb[i] = 0;
+ }
+ lastadp = NULL;
+ break;
+ }
+ /*
+ * If we have zero'ed out the last allocated block of the ext
+ * data, roll back the size to the last currently allocated block.
+ * We know that this last allocated block is a full-sized as
+ * we already checked for fragments in the loop above.
+ */
+ if (lastadp != NULL &&
+ dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
+ for (i = lastadp->ad_lbn; i >= 0; i--)
+ if (dp->di_extb[i] != 0)
+ break;
+ dp->di_extsize = (i + 1) * fs->fs_bsize;
+ }
+ /*
+ * Set the file data dependencies to busy.
+ */
for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
adp = TAILQ_NEXT(adp, ad_next)) {
#ifdef DIAGNOSTIC
@@ -3617,7 +3854,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
#ifdef DIAGNOSTIC
if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
FREE_LOCK(&lk);
- panic("softdep_write_inodeblock: lost dep1");
+ panic("softdep_write_inodeblock: lost dep2");
}
#endif /* DIAGNOSTIC */
dp->di_db[i] = 0;
@@ -3627,7 +3864,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
if (dp->di_ib[i] != 0 &&
(deplist & ((1 << NDADDR) << i)) == 0) {
FREE_LOCK(&lk);
- panic("softdep_write_inodeblock: lost dep2");
+ panic("softdep_write_inodeblock: lost dep3");
}
#endif /* DIAGNOSTIC */
dp->di_ib[i] = 0;
@@ -3805,6 +4042,7 @@ static void
handle_allocdirect_partdone(adp)
struct allocdirect *adp; /* the completed allocdirect */
{
+ struct allocdirectlst *listhead;
struct allocdirect *listadp;
struct inodedep *inodedep;
long bsize, delay;
@@ -3822,11 +4060,16 @@ handle_allocdirect_partdone(adp)
* which would corrupt the filesystem. Thus, we cannot free any
* allocdirects after one whose ad_oldblkno claims a fragment as
* these blocks must be rolled back to zero before writing the inode.
- * We check the currently active set of allocdirects in id_inoupdt.
+ * We check the currently active set of allocdirects in id_inoupdt
+ * or id_extupdt as appropriate.
*/
inodedep = adp->ad_inodedep;
bsize = inodedep->id_fs->fs_bsize;
- TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
+ if (adp->ad_state & EXTDATA)
+ listhead = &inodedep->id_extupdt;
+ else
+ listhead = &inodedep->id_inoupdt;
+ TAILQ_FOREACH(listadp, listhead, ad_next) {
/* found our block */
if (listadp == adp)
break;
@@ -3845,7 +4088,11 @@ handle_allocdirect_partdone(adp)
*/
if (listadp == NULL) {
#ifdef DEBUG
- TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
+ if (adp->ad_state & EXTDATA)
+ listhead = &inodedep->id_newextupdt;
+ else
+ listhead = &inodedep->id_newinoupdt;
+ TAILQ_FOREACH(listadp, listhead, ad_next)
/* found our block */
if (listadp == adp)
break;
@@ -3868,7 +4115,7 @@ handle_allocdirect_partdone(adp)
listadp = TAILQ_NEXT(adp, ad_next);
if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
return;
- free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ free_allocdirect(listhead, adp, delay);
}
}
@@ -4023,12 +4270,31 @@ handle_written_inodeblock(inodedep, bp)
adp->ad_state |= ATTACHED;
hadchanges = 1;
}
+ for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {
+ nextadp = TAILQ_NEXT(adp, ad_next);
+ if (adp->ad_state & ATTACHED) {
+ lk.lkt_held = NOHOLDER;
+ panic("handle_written_inodeblock: new entry");
+ }
+ if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno) {
+ lk.lkt_held = NOHOLDER;
+ panic("%s: direct pointers #%jd %s %jd != %jd",
+ "handle_written_inodeblock",
+ (intmax_t)adp->ad_lbn, "mismatch",
+ (intmax_t)dp2->di_extb[adp->ad_lbn],
+ (intmax_t)adp->ad_oldblkno);
+ }
+ dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno;
+ adp->ad_state &= ~UNDONE;
+ adp->ad_state |= ATTACHED;
+ hadchanges = 1;
+ }
if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
stat_direct_blk_ptrs++;
/*
* Reset the file size to its most up-to-date value.
*/
- if (inodedep->id_savedsize == -1) {
+ if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1) {
lk.lkt_held = NOHOLDER;
panic("handle_written_inodeblock: bad size");
}
@@ -4042,8 +4308,13 @@ handle_written_inodeblock(inodedep, bp)
dp2->di_size = inodedep->id_savedsize;
hadchanges = 1;
}
+ if (dp2->di_extsize != inodedep->id_savedextsize) {
+ dp2->di_extsize = inodedep->id_savedextsize;
+ hadchanges = 1;
+ }
}
inodedep->id_savedsize = -1;
+ inodedep->id_savedextsize = -1;
/*
* If there were any rollbacks in the inode block, then it must be
* marked dirty so that its will eventually get written back in
@@ -4056,6 +4327,8 @@ handle_written_inodeblock(inodedep, bp)
*/
if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
handle_allocdirect_partdone(adp);
+ if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
+ handle_allocdirect_partdone(adp);
/*
* Process deallocations that were held pending until the
* inode had been written to disk. Freeing of the inode
@@ -4119,7 +4392,9 @@ handle_written_inodeblock(inodedep, bp)
/*
* If no outstanding dependencies, free it.
*/
- if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
+ if (free_inodedep(inodedep) ||
+ (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
+ TAILQ_FIRST(&inodedep->id_extupdt) == 0))
return (0);
return (hadchanges);
}
@@ -4358,9 +4633,12 @@ softdep_update_inodeblock(ip, bp, waitfor)
* the in-memory copy of the inode. Once merged process any
* allocdirects that are completed by the merger.
*/
- merge_inode_lists(inodedep);
+ merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
+ merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
+ if (TAILQ_FIRST(&inodedep->id_extupdt) != NULL)
+ handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt));
/*
* Now that the inode has been pushed into the buffer, the
* operations dependent on the inode being written to disk
@@ -4392,34 +4670,35 @@ softdep_update_inodeblock(ip, bp, waitfor)
}
/*
- * Merge the new inode dependency list (id_newinoupdt) into the old
- * inode dependency list (id_inoupdt). This routine must be called
- * with splbio interrupts blocked.
+ * Merge the a new inode dependency list (such as id_newinoupdt) into an
+ * old inode dependency list (such as id_inoupdt). This routine must be
+ * called with splbio interrupts blocked.
*/
static void
-merge_inode_lists(inodedep)
- struct inodedep *inodedep;
+merge_inode_lists(newlisthead, oldlisthead)
+ struct allocdirectlst *newlisthead;
+ struct allocdirectlst *oldlisthead;
{
struct allocdirect *listadp, *newadp;
- newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
- for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
+ newadp = TAILQ_FIRST(newlisthead);
+ for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
if (listadp->ad_lbn < newadp->ad_lbn) {
listadp = TAILQ_NEXT(listadp, ad_next);
continue;
}
- TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
+ TAILQ_REMOVE(newlisthead, newadp, ad_next);
TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
if (listadp->ad_lbn == newadp->ad_lbn) {
- allocdirect_merge(&inodedep->id_inoupdt, newadp,
+ allocdirect_merge(oldlisthead, newadp,
listadp);
listadp = newadp;
}
- newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
+ newadp = TAILQ_FIRST(newlisthead);
}
- while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
- TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
- TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
+ while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {
+ TAILQ_REMOVE(newlisthead, newadp, ad_next);
+ TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);
}
}
@@ -4454,6 +4733,8 @@ softdep_fsync(vp)
}
if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
LIST_FIRST(&inodedep->id_bufwait) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) {
FREE_LOCK(&lk);
@@ -4877,9 +5158,7 @@ flush_inodedep_deps(fs, ino)
ino_t ino;
{
struct inodedep *inodedep;
- struct allocdirect *adp;
int error, waitfor;
- struct buf *bp;
/*
* This work is done in two passes. The first pass grabs most
@@ -4894,52 +5173,17 @@ flush_inodedep_deps(fs, ino)
* We give a brief window at the top of the loop to allow
* any pending I/O to complete.
*/
- for (waitfor = MNT_NOWAIT; ; ) {
+ for (error = 0, waitfor = MNT_NOWAIT; ; ) {
+ if (error)
+ return (error);
FREE_LOCK(&lk);
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
- TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
- if (adp->ad_state & DEPCOMPLETE)
- continue;
- bp = adp->ad_buf;
- if (getdirtybuf(&bp, waitfor) == 0) {
- if (waitfor == MNT_NOWAIT)
- continue;
- break;
- }
- FREE_LOCK(&lk);
- if (waitfor == MNT_NOWAIT) {
- bawrite(bp);
- } else if ((error = BUF_WRITE(bp)) != 0) {
- ACQUIRE_LOCK(&lk);
- return (error);
- }
- ACQUIRE_LOCK(&lk);
- break;
- }
- if (adp != NULL)
- continue;
- TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
- if (adp->ad_state & DEPCOMPLETE)
- continue;
- bp = adp->ad_buf;
- if (getdirtybuf(&bp, waitfor) == 0) {
- if (waitfor == MNT_NOWAIT)
- continue;
- break;
- }
- FREE_LOCK(&lk);
- if (waitfor == MNT_NOWAIT) {
- bawrite(bp);
- } else if ((error = BUF_WRITE(bp)) != 0) {
- ACQUIRE_LOCK(&lk);
- return (error);
- }
- ACQUIRE_LOCK(&lk);
- break;
- }
- if (adp != NULL)
+ if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_newextupdt, waitfor, &error))
continue;
/*
* If pass2, we are done, otherwise do pass 2.
@@ -4957,6 +5201,41 @@ flush_inodedep_deps(fs, ino)
}
/*
+ * Flush an inode dependency list.
+ * Called with splbio blocked.
+ */
+static int
+flush_deplist(listhead, waitfor, errorp)
+ struct allocdirectlst *listhead;
+ int waitfor;
+ int *errorp;
+{
+ struct allocdirect *adp;
+ struct buf *bp;
+
+ TAILQ_FOREACH(adp, listhead, ad_next) {
+ if (adp->ad_state & DEPCOMPLETE)
+ continue;
+ bp = adp->ad_buf;
+ if (getdirtybuf(&bp, waitfor) == 0) {
+ if (waitfor == MNT_NOWAIT)
+ continue;
+ return (1);
+ }
+ FREE_LOCK(&lk);
+ if (waitfor == MNT_NOWAIT) {
+ bawrite(bp);
+ } else if ((*errorp = BUF_WRITE(bp)) != 0) {
+ ACQUIRE_LOCK(&lk);
+ return (1);
+ }
+ ACQUIRE_LOCK(&lk);
+ return (1);
+ }
+ return (0);
+}
+
+/*
* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
* Called with splbio blocked.
*/
@@ -5406,6 +5685,12 @@ softdep_count_dependencies(bp, wantcount)
if (!wantcount)
goto out;
}
+ if (TAILQ_FIRST(&inodedep->id_extupdt)) {
+ /* direct block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
continue;
case D_INDIRDEP:
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index df084c7..c20b53c 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -123,6 +123,20 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
panic("softdep_setup_allocdirect called");
}
+void
+softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t newblkno;
+ ufs2_daddr_t oldblkno;
+ long newsize;
+ long oldsize;
+ struct buf *bp;
+{
+
+ panic("softdep_setup_allocdirect called");
+}
+
void
softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
struct inode *ip;
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index cbee51b..f29e89f 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -89,8 +89,10 @@
* dependencies are complete. The INPROGRESS flag marks worklist
* structures that are still on the worklist, but are being considered
* for action by some process. The UFS1FMT flag indicates that the
- * inode being processed is a ufs1 format. The ONWORKLIST flag shows
- * whether the structure is currently linked onto a worklist.
+ * inode being processed is a ufs1 format. The EXTDATA flag indicates
+ * that the allocdirect describes an extended-attributes dependency.
+ * The ONWORKLIST flag shows whether the structure is currently linked
+ * onto a worklist.
*/
#define ATTACHED 0x0001
#define UNDONE 0x0002
@@ -106,6 +108,7 @@
#define NEWBLOCK 0x0800 /* pagedep only */
#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */
#define UFS1FMT 0x2000 /* indirdep only */
+#define EXTDATA 0x4000 /* allocdirect only */
#define ONWORKLIST 0x8000
#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
@@ -251,12 +254,15 @@ struct inodedep {
nlink_t id_nlinkdelta; /* saved effective link count */
LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */
struct buf *id_buf; /* related bmsafemap (if pending) */
+ long id_savedextsize; /* ext size saved during rollback */
off_t id_savedsize; /* file size saved during rollback */
struct workhead id_pendinghd; /* entries awaiting directory write */
struct workhead id_bufwait; /* operations after inode written */
struct workhead id_inowait; /* operations waiting inode update */
struct allocdirectlst id_inoupdt; /* updates before inode written */
struct allocdirectlst id_newinoupdt; /* updates when inode written */
+ struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */
+ struct allocdirectlst id_newextupdt; /* extdata updates at ino write */
union {
struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */
struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
@@ -427,11 +433,12 @@ struct freeblks {
uid_t fb_uid; /* uid of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */
struct mount *fb_mnt; /* associated mount point */
+ long fb_oldextsize; /* previous ext data size */
off_t fb_oldsize; /* previous file size */
- off_t fb_newsize; /* new file size */
ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */
ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */
ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */
+ ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */
};
/*
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index abe2bea..731354e 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -83,7 +83,7 @@ ufs_bmap(ap)
if (ap->a_bnp == NULL)
return (0);
- error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno,
+ error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL,
ap->a_runp, ap->a_runb);
*ap->a_bnp = blkno;
return (error);
@@ -104,10 +104,11 @@ ufs_bmap(ap)
*/
int
-ufs_bmaparray(vp, bn, bnp, runp, runb)
+ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
struct vnode *vp;
ufs2_daddr_t bn;
ufs2_daddr_t *bnp;
+ struct buf *nbp;
int *runp;
int *runb;
{
@@ -146,7 +147,19 @@ ufs_bmaparray(vp, bn, bnp, runp, runb)
num = *nump;
if (num == 0) {
- *bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
+ if (bn >= 0 && bn < NDADDR) {
+ *bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
+ } else if (bn < 0 && bn >= -NXADDR) {
+ *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ if (nbp == NULL)
+ panic("ufs_bmaparray: mapping ext data");
+ nbp->b_xflags |= BX_ALTDATA;
+ return (0);
+ } else {
+ panic("ufs_bmaparray: blkno out of range");
+ }
/*
* Since this is FFS independent code, we are out of
* scope for the definitions of BLK_NOCOPY and
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 85b508a..d4e333c 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -60,13 +60,15 @@ int ufs_vnoperatefifo(struct vop_generic_args *);
int ufs_vnoperatespec(struct vop_generic_args *);
int ufs_bmap(struct vop_bmap_args *);
-int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, int *,
- int *);
+int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
+ struct buf *, int *, int *);
int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
int ufs_checkpath(struct inode *, struct inode *, struct ucred *);
void ufs_dirbad(struct inode *, doff_t, char *);
int ufs_dirbadentry(struct vnode *, struct direct *, int);
int ufs_dirempty(struct inode *, ino_t, struct ucred *);
+int ufs_extread(struct vop_read_args *);
+int ufs_extwrite(struct vop_write_args *);
void ufs_makedirentry(struct inode *, struct componentname *,
struct direct *);
int ufs_direnter(struct vnode *, struct vnode *, struct direct *,
@@ -107,10 +109,12 @@ void softdep_change_linkcnt(struct inode *);
void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
-/* Flags to low-level allocation routines. */
-#define BA_CLRBUF 0x01 /* Request allocated buffer be cleared. */
-#define BA_SYNC 0x02 /* Do all allocations synchronously. */
-#define BA_METAONLY 0x04 /* Return indirect block buffer. */
-#define BA_NOWAIT 0x08 /* do not sleep to await lock */
+/*
+ * Flags to low-level allocation routines.
+ * The low 16-bits are reserved for IO_ flags from vnode.h.
+ */
+#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */
+#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
+#define BA_NOWAIT 0x00040000 /* Do not sleep to await lock. */
#endif /* !_UFS_UFS_EXTERN_H_ */
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index c9ac362..3166fec 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -95,7 +95,8 @@ ufs_inactive(ap)
#ifdef UFS_EXTATTR
ufs_extattr_vnode_inactive(ap->a_vp, ap->a_td);
#endif
- error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td);
+ error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
+ NOCRED, td);
/*
* Setting the mode to zero needs to wait for the inode
* to be written just as does a change to the link count.
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 1df9146..4515b6d 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -752,7 +752,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
panic("ufs_direnter: newblk");
flags = BA_CLRBUF;
if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
- flags |= BA_SYNC;
+ flags |= IO_SYNC;
if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
cr, flags, &bp)) != 0) {
if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
@@ -961,7 +961,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
if (dp->i_dirhash != NULL)
ufsdirhash_dirtrunc(dp, dp->i_endoff);
#endif
- (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, td);
+ (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+ IO_NORMAL | IO_SYNC, cr, td);
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 406832e..9db4e87 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,13 @@
/*-
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
@@ -77,6 +86,9 @@ READ(ap)
int ioflag;
vm_object_t object;
+ if (ap->a_ioflag & IO_EXT)
+ return (ufs_extread(ap));
+
GIANT_REQUIRED;
vp = ap->a_vp;
@@ -400,6 +412,9 @@ WRITE(ap)
int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
vm_object_t object;
+ if (ap->a_ioflag & IO_EXT)
+ return (ufs_extwrite(ap));
+
GIANT_REQUIRED;
extended = 0;
@@ -471,7 +486,7 @@ WRITE(ap)
osize = ip->i_size;
flags = 0;
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
- flags = BA_SYNC;
+ flags = IO_SYNC;
#ifdef ENABLE_VFS_IOOPT
if (object && (object->flags & OBJ_OPT)) {
@@ -581,7 +596,8 @@ WRITE(ap)
if (error) {
if (ioflag & IO_UNIT) {
(void)UFS_TRUNCATE(vp, osize,
- ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
+ IO_NORMAL | (ioflag & IO_SYNC),
+ ap->a_cred, uio->uio_td);
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
@@ -595,7 +611,6 @@ WRITE(ap)
return (error);
}
-
/*
* get page routine
*/
@@ -661,7 +676,7 @@ ffs_getpages(ap)
poff = (foff % bsize) / PAGE_SIZE;
dp = VTOI(vp)->i_devvp;
- if (ufs_bmaparray(vp, reqlblkno, &reqblkno, &bforwards, &bbackwards)
+ if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
|| (reqblkno == -1)) {
for(i = 0; i < pcount; i++) {
if (i != ap->a_reqpage)
@@ -730,3 +745,321 @@ ffs_getpages(ap)
return (rtval);
}
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+int
+ufs_extread(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct inode *ip;
+ struct ufs2_dinode *dp;
+ struct uio *uio;
+ struct fs *fs;
+ struct buf *bp;
+ ufs_lbn_t lbn, nextlbn;
+ off_t bytesinfile;
+ long size, xfersize, blkoffset;
+ int error, orig_resid;
+ mode_t mode;
+ int ioflag;
+
+ GIANT_REQUIRED;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ dp = ip->i_din2;
+ mode = ip->i_mode;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
+ panic("ufs_extread: mode");
+
+#endif
+ orig_resid = uio->uio_resid;
+ if (orig_resid <= 0)
+ return (0);
+
+ bytesinfile = dp->di_extsize - uio->uio_offset;
+ if (bytesinfile <= 0) {
+ if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ ip->i_flag |= IN_ACCESS;
+ return 0;
+ }
+
+ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+ if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
+ break;
+
+ lbn = lblkno(fs, uio->uio_offset);
+ nextlbn = lbn + 1;
+
+ /*
+ * size of buffer. The buffer representing the
+ * end of the file is rounded up to the size of
+ * the block type ( fragment or full block,
+ * depending ).
+ */
+ size = sblksize(fs, dp->di_extsize, lbn);
+ blkoffset = blkoff(fs, uio->uio_offset);
+
+ /*
+ * The amount we want to transfer in this iteration is
+ * one FS block less the amount of the data before
+ * our startpoint (duh!)
+ */
+ xfersize = fs->fs_bsize - blkoffset;
+
+ /*
+ * But if we actually want less than the block,
+ * or the file doesn't have a whole block more of data,
+ * then use the lesser number.
+ */
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+ if (bytesinfile < xfersize)
+ xfersize = bytesinfile;
+
+ if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
+ /*
+ * Don't do readahead if this is the end of the info.
+ */
+ error = bread(vp, -1 - lbn, size, NOCRED, &bp);
+ } else {
+ /*
+ * If we have a second block, then
+ * fire off a request for a readahead
+ * as well as a read. Note that the 4th and 5th
+ * arguments point to arrays of the size specified in
+ * the 6th argument.
+ */
+ int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
+
+ nextlbn = -1 - nextlbn;
+ error = breadn(vp, -1 - lbn,
+ size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+ }
+ if (error) {
+ brelse(bp);
+ bp = NULL;
+ break;
+ }
+
+ /*
+ * If IO_DIRECT then set B_DIRECT for the buffer. This
+ * will cause us to attempt to release the buffer later on
+ * and will cause the buffer cache to attempt to free the
+ * underlying pages.
+ */
+ if (ioflag & IO_DIRECT)
+ bp->b_flags |= B_DIRECT;
+
+ /*
+ * We should only get non-zero b_resid when an I/O error
+ * has occurred, which should cause us to break above.
+ * However, if the short read did not cause an error,
+ * then we want to ensure that we do not uiomove bad
+ * or uninitialized data.
+ */
+ size -= bp->b_resid;
+ if (size < xfersize) {
+ if (size == 0)
+ break;
+ xfersize = size;
+ }
+
+ error = uiomove((char *)bp->b_data + blkoffset,
+ (int)xfersize, uio);
+ if (error)
+ break;
+
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ /*
+ * If there are no dependencies, and it's VMIO,
+ * then we don't need the buf, mark it available
+ * for freeing. The VM has the data.
+ */
+ bp->b_flags |= B_RELBUF;
+ brelse(bp);
+ } else {
+ /*
+ * Otherwise let whoever
+ * made the request take care of
+ * freeing it. We just queue
+ * it onto another list.
+ */
+ bqrelse(bp);
+ }
+ }
+
+ /*
+ * This can only happen in the case of an error
+ * because the loop above resets bp to NULL on each iteration
+ * and on normal completion has not set a new value into it.
+ * so it must have come from a 'break' statement
+ */
+ if (bp != NULL) {
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ bp->b_flags |= B_RELBUF;
+ brelse(bp);
+ } else {
+ bqrelse(bp);
+ }
+ }
+
+ if ((error == 0 || uio->uio_resid != orig_resid) &&
+ (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ ip->i_flag |= IN_ACCESS;
+ return (error);
+}
+
+/*
+ * Vnode op for external attribute writing.
+ */
+int
+ufs_extwrite(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct uio *uio;
+ struct inode *ip;
+ struct ufs2_dinode *dp;
+ struct fs *fs;
+ struct buf *bp;
+ ufs_lbn_t lbn;
+ off_t osize;
+ int blkoffset, error, flags, ioflag, resid, size, xfersize;
+
+ GIANT_REQUIRED;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ dp = ip->i_din2;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
+ panic("ext_write: mode");
+#endif
+
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = dp->di_extsize;
+
+ if (uio->uio_offset < 0 ||
+ (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
+ return (EFBIG);
+
+ resid = uio->uio_resid;
+ osize = dp->di_extsize;
+ flags = IO_EXT;
+ if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
+ flags |= IO_SYNC;
+
+ for (error = 0; uio->uio_resid > 0;) {
+ lbn = lblkno(fs, uio->uio_offset);
+ blkoffset = blkoff(fs, uio->uio_offset);
+ xfersize = fs->fs_bsize - blkoffset;
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+
+ /*
+ * We must perform a read-before-write if the transfer size
+ * does not cover the entire buffer.
+ */
+ if (fs->fs_bsize > xfersize)
+ flags |= BA_CLRBUF;
+ else
+ flags &= ~BA_CLRBUF;
+ error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
+ ap->a_cred, flags, &bp);
+ if (error != 0)
+ break;
+ /*
+ * If the buffer is not valid we have to clear out any
+ * garbage data from the pages instantiated for the buffer.
+ * If we do not, a failed uiomove() during a write can leave
+ * the prior contents of the pages exposed to a userland
+ * mmap(). XXX deal with uiomove() errors a better way.
+ */
+ if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
+ vfs_bio_clrbuf(bp);
+ if (ioflag & IO_DIRECT)
+ bp->b_flags |= B_DIRECT;
+ if (ioflag & IO_NOWDRAIN)
+ bp->b_flags |= B_NOWDRAIN;
+
+ if (uio->uio_offset + xfersize > dp->di_extsize)
+ dp->di_extsize = uio->uio_offset + xfersize;
+
+ size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
+ if (size < xfersize)
+ xfersize = size;
+
+ error =
+ uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ bp->b_flags |= B_RELBUF;
+ }
+
+ /*
+ * If IO_SYNC each buffer is written synchronously. Otherwise
+ * if we have a severe page deficiency write the buffer
+ * asynchronously. Otherwise try to cluster, and if that
+ * doesn't do it then either do an async write (if O_DIRECT),
+ * or a delayed write (if not).
+ */
+ if (ioflag & IO_SYNC) {
+ (void)bwrite(bp);
+ } else if (vm_page_count_severe() ||
+ buf_dirty_count_severe() ||
+ xfersize + blkoffset == fs->fs_bsize ||
+ (ioflag & (IO_ASYNC | IO_DIRECT)))
+ bawrite(bp);
+ else
+ bdwrite(bp);
+ if (error || xfersize == 0)
+ break;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ /*
+ * If we successfully wrote any data, and we are not the superuser
+ * we clear the setuid and setgid bits as a precaution against
+ * tampering.
+ */
+ if (resid > uio->uio_resid && ap->a_cred &&
+ suser_cred(ap->a_cred, PRISON_ROOT)) {
+ ip->i_mode &= ~(ISUID | ISGID);
+ dp->di_mode = ip->i_mode;
+ }
+ if (error) {
+ if (ioflag & IO_UNIT) {
+ (void)UFS_TRUNCATE(vp, osize,
+ IO_EXT | (ioflag&IO_SYNC), ap->a_cred, uio->uio_td);
+ uio->uio_offset -= resid - uio->uio_resid;
+ uio->uio_resid = resid;
+ }
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+ error = UFS_UPDATE(vp, 1);
+ return (error);
+}
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 0ef9ed2..66d8319 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -555,7 +555,8 @@ ufs_setattr(ap)
default:
break;
}
- if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, td)) != 0)
+ if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
+ cred, td)) != 0)
return (error);
}
if (vap->va_atime.tv_sec != VNOVAL ||
@@ -1268,7 +1269,9 @@ abortit:
xp->i_nlink--;
DIP(xp, i_nlink) = xp->i_nlink;
xp->i_flag |= IN_CHANGE;
- ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
+ ioflag = IO_NORMAL;
+ if (DOINGASYNC(tvp))
+ ioflag |= IO_SYNC;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_thread)) != 0)
goto bad;
@@ -1762,7 +1765,9 @@ ufs_rmdir(ap)
ip->i_nlink--;
DIP(ip, i_nlink) = ip->i_nlink;
ip->i_flag |= IN_CHANGE;
- ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
+ ioflag = IO_NORMAL;
+ if (DOINGASYNC(vp))
+ ioflag |= IO_SYNC;
error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
cnp->cn_thread);
}
@@ -1980,7 +1985,7 @@ ufs_strategy(ap)
if (vp->v_type == VBLK || vp->v_type == VCHR)
panic("ufs_strategy: spec");
if (bp->b_blkno == bp->b_lblkno) {
- error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
+ error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
bp->b_blkno = blkno;
if (error) {
bp->b_error = error;
OpenPOWER on IntegriCloud